diff --git a/NEWS b/NEWS index 559a51ba3f..f2497d5cec 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,6 @@ +v2.16.1 - xx xxx xxxx +--------------------- + v2.16.0 - 16 Aug 2021 --------------------- - Removed support for 1024-bit Diffie-Hellman key exchange, which is now diff --git a/configure.ac b/configure.ac index 16b32be965..4def2ebd0a 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.16.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.16.1, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/debian/changelog b/debian/changelog index 239d210b96..0f521be4d8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +openvswitch (2.16.1-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Mon, 16 Aug 2021 22:08:13 +0200 + openvswitch (2.16.0-1) unstable; urgency=low * New upstream version diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h index 73b562e03d..0831a9cee1 100644 --- a/include/openvswitch/json.h +++ b/include/openvswitch/json.h @@ -50,7 +50,9 @@ enum json_type { JSON_INTEGER, /* 123. */ JSON_REAL, /* 123.456. */ JSON_STRING, /* "..." */ - JSON_N_TYPES + JSON_N_TYPES, + JSON_SERIALIZED_OBJECT, /* Internal type to hold serialized version of + * data of other types. */ }; const char *json_type_to_string(enum json_type); @@ -70,7 +72,7 @@ struct json { struct json_array array; long long int integer; double real; - char *string; + char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ }; }; @@ -78,6 +80,7 @@ struct json *json_null_create(void); struct json *json_boolean_create(bool); struct json *json_string_create(const char *); struct json *json_string_create_nocopy(char *); +struct json *json_serialized_object_create(const struct json *); struct json *json_integer_create(long long int); struct json *json_real_create(double); @@ -99,6 +102,7 @@ void json_object_put_format(struct json *, OVS_PRINTF_FORMAT(3, 4); const char *json_string(const struct json *); +const char *json_serialized_object(const struct json *); struct json_array *json_array(const struct json *); struct shash *json_object(const struct json *); bool json_boolean(const struct json *); @@ -125,6 +129,7 @@ struct json *json_parser_finish(struct json_parser *); void json_parser_abort(struct json_parser *); struct json *json_from_string(const char *string); +struct json *json_from_serialized_object(const struct json *); struct json *json_from_file(const char *file_name); struct json *json_from_stream(FILE *stream); diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 08d93c2779..3dc582fbfd 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -199,6 +199,7 @@ struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t, void dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom); static inline void dp_packet_delete(struct dp_packet *); +static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *); static inline void *dp_packet_at(const struct dp_packet *, size_t offset, size_t size); @@ -256,6 +257,18 @@ dp_packet_delete(struct dp_packet *b) } } +/* Swaps content of two packets. */ +static inline void +dp_packet_swap(struct dp_packet *a, struct dp_packet *b) +{ + ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB); + ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB); + struct dp_packet c = *a; + + *a = *b; + *b = c; +} + /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to * byte 'offset'. Otherwise, returns a null pointer. */ static inline void * diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index bddce75b63..f9782b596f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4061,7 +4061,10 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) flow_hash_5tuple(execute->flow, 0)); } - dp_packet_batch_init_packet(&pp, execute->packet); + /* Making a copy because the packet might be stolen during the execution + * and caller might still need it. */ + struct dp_packet *packet_clone = dp_packet_clone(execute->packet); + dp_packet_batch_init_packet(&pp, packet_clone); dp_netdev_execute_actions(pmd, &pp, false, execute->flow, execute->actions, execute->actions_len); dp_netdev_pmd_flush_output_packets(pmd, true); @@ -4071,6 +4074,14 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) dp_netdev_pmd_unref(pmd); } + if (dp_packet_batch_size(&pp)) { + /* Packet wasn't dropped during the execution. Swapping content with + * the original packet, because the caller might expect actions to + * modify it. */ + dp_packet_swap(execute->packet, packet_clone); + dp_packet_delete_batch(&pp, true); + } + return 0; } diff --git a/lib/ipf.c b/lib/ipf.c index d9f781147a..665f40fefe 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -1152,52 +1152,56 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, * NETDEV_MAX_BURST. */ DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { if (rp && pkt == rp->list->reass_execute_ctx) { + const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; + void *l4_frag = dp_packet_l4(frag_0->pkt); + void *l4_reass = dp_packet_l4(pkt); + memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); + for (int i = 0; i <= rp->list->last_inuse_idx; i++) { - rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label; - rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark; - rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state; - rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone; - rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 = + const struct ipf_frag *frag_i = &rp->list->frag_list[i]; + + frag_i->pkt->md.ct_label = pkt->md.ct_label; + frag_i->pkt->md.ct_mark = pkt->md.ct_mark; + frag_i->pkt->md.ct_state = pkt->md.ct_state; + frag_i->pkt->md.ct_zone = pkt->md.ct_zone; + frag_i->pkt->md.ct_orig_tuple_ipv6 = pkt->md.ct_orig_tuple_ipv6; if (pkt->md.ct_orig_tuple_ipv6) { - rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 = + frag_i->pkt->md.ct_orig_tuple.ipv6 = pkt->md.ct_orig_tuple.ipv6; } else { - rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 = + frag_i->pkt->md.ct_orig_tuple.ipv4 = pkt->md.ct_orig_tuple.ipv4; } - } - - const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; - void *l4_frag = dp_packet_l4(frag_0->pkt); - void *l4_reass = dp_packet_l4(pkt); - memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); - - if (v6) { - struct ovs_16aligned_ip6_hdr *l3_frag - = dp_packet_l3(frag_0->pkt); - struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt); - l3_frag->ip6_src = l3_reass->ip6_src; - l3_frag->ip6_dst = l3_reass->ip6_dst; - } else { - struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt); - struct ip_header *l3_reass = dp_packet_l3(pkt); - if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) { - ovs_be32 reass_ip = - get_16aligned_be32(&l3_reass->ip_src); - ovs_be32 frag_ip = - get_16aligned_be32(&l3_frag->ip_src); - - l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, - frag_ip, reass_ip); - reass_ip = get_16aligned_be32(&l3_reass->ip_dst); - frag_ip = get_16aligned_be32(&l3_frag->ip_dst); - l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, - frag_ip, reass_ip); + if (v6) { + struct ovs_16aligned_ip6_hdr *l3_frag + = dp_packet_l3(frag_i->pkt); + struct ovs_16aligned_ip6_hdr *l3_reass + = dp_packet_l3(pkt); + l3_frag->ip6_src = l3_reass->ip6_src; + l3_frag->ip6_dst = l3_reass->ip6_dst; + } else { + struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt); + struct ip_header *l3_reass = dp_packet_l3(pkt); + if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) { + ovs_be32 reass_ip = + get_16aligned_be32(&l3_reass->ip_src); + ovs_be32 frag_ip = + get_16aligned_be32(&l3_frag->ip_src); + + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, + frag_ip, + reass_ip); + reass_ip = get_16aligned_be32(&l3_reass->ip_dst); + frag_ip = get_16aligned_be32(&l3_frag->ip_dst); + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, + frag_ip, + reass_ip); + } + + l3_frag->ip_src = l3_reass->ip_src; + l3_frag->ip_dst = l3_reass->ip_dst; } - - l3_frag->ip_src = l3_reass->ip_src; - l3_frag->ip_dst = l3_reass->ip_dst; } ipf_completed_list_add(&ipf->frag_complete_list, rp->list); diff --git a/lib/json.c b/lib/json.c index 32d25003b8..0baf7c622c 100644 --- a/lib/json.c +++ b/lib/json.c @@ -146,6 +146,7 @@ json_type_to_string(enum json_type type) case JSON_STRING: return "string"; + case JSON_SERIALIZED_OBJECT: case JSON_N_TYPES: default: return ""; @@ -180,6 +181,14 @@ json_string_create(const char *s) return json_string_create_nocopy(xstrdup(s)); } +struct json * +json_serialized_object_create(const struct json *src) +{ + struct json *json = json_create(JSON_SERIALIZED_OBJECT); + json->string = json_to_string(src, JSSF_SORT); + return json; +} + struct json * json_array_create_empty(void) { @@ -309,6 +318,13 @@ json_string(const struct json *json) return json->string; } +const char * +json_serialized_object(const struct json *json) +{ + ovs_assert(json->type == JSON_SERIALIZED_OBJECT); + return json->string; +} + struct json_array * json_array(const struct json *json) { @@ -362,6 +378,7 @@ json_destroy(struct json *json) break; case JSON_STRING: + case JSON_SERIALIZED_OBJECT: free(json->string); break; @@ -422,6 +439,9 @@ json_deep_clone(const struct json *json) case JSON_STRING: return json_string_create(json->string); + case JSON_SERIALIZED_OBJECT: + return json_serialized_object_create(json); + case JSON_NULL: case JSON_FALSE: case JSON_TRUE: @@ -521,6 +541,7 @@ json_hash(const struct json *json, size_t basis) return json_hash_array(&json->array, basis); case JSON_STRING: + case JSON_SERIALIZED_OBJECT: return hash_string(json->string, basis); case JSON_NULL: @@ -596,6 +617,7 @@ json_equal(const struct json *a, const struct json *b) return json_equal_array(&a->array, &b->array); case JSON_STRING: + case JSON_SERIALIZED_OBJECT: return !strcmp(a->string, b->string); case JSON_NULL: @@ -1072,6 +1094,14 @@ json_from_string(const char *string) return json_parser_finish(p); } +/* Parses data of JSON_SERIALIZED_OBJECT to the real JSON. */ +struct json * +json_from_serialized_object(const struct json *json) +{ + ovs_assert(json->type == JSON_SERIALIZED_OBJECT); + return json_from_string(json->string); +} + /* Reads the file named 'file_name', parses its contents as a JSON object or * array, and returns a newly allocated 'struct json'. The caller must free * the returned structure with json_destroy() when it is no longer needed. @@ -1563,6 +1593,10 @@ json_serialize(const struct json *json, struct json_serializer *s) json_serialize_string(json->string, ds); break; + case JSON_SERIALIZED_OBJECT: + ds_put_cstr(ds, json->string); + break; + case JSON_N_TYPES: default: OVS_NOT_REACHED(); @@ -1696,14 +1730,30 @@ json_serialize_string(const char *string, struct ds *ds) { uint8_t c; uint8_t c2; + size_t count; const char *escape; + const char *start; ds_put_char(ds, '"'); + count = 0; + start = string; while ((c = *string++) != '\0') { - escape = chars_escaping[c]; - while ((c2 = *escape++) != '\0') { - ds_put_char(ds, c2); + if (c >= ' ' && c != '"' && c != '\\') { + count++; + } else { + if (count) { + ds_put_buffer(ds, start, count); + count = 0; + } + start = string; + escape = chars_escaping[c]; + while ((c2 = *escape++) != '\0') { + ds_put_char(ds, c2); + } } } + if (count) { + ds_put_buffer(ds, start, count); + } ds_put_char(ds, '"'); } diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 45a96b9be2..ca92c947a2 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -961,14 +961,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) rte_eth_dev_info_get(dev->port_id, &info); - /* As of DPDK 19.11, it is not allowed to set a mq_mode for - * virtio PMD driver. */ - if (!strcmp(info.driver_name, "net_virtio")) { - conf.rxmode.mq_mode = ETH_MQ_RX_NONE; - } else { - conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - } - /* As of DPDK 17.11.1 a few PMDs require to explicitly enable * scatter to support jumbo RX. * Setting scatter for the device is done after checking for @@ -1000,6 +992,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) /* Limit configured rss hash functions to only those supported * by the eth device. */ conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; + if (conf.rx_adv_conf.rss_conf.rss_hf == 0) { + conf.rxmode.mq_mode = ETH_MQ_RX_NONE; + } else { + conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + } /* A device may report more queues than it makes available (this has * been observed for Intel xl710, which reserves some of them for diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 60dd138914..97bd21be4a 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -627,6 +627,7 @@ netdev_linux_notify_sock(void) if (!error) { size_t i; + nl_sock_listen_all_nsid(sock, true); for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { error = nl_sock_join_mcgroup(sock, mcgroups[i]); if (error) { @@ -636,7 +637,6 @@ netdev_linux_notify_sock(void) } } } - nl_sock_listen_all_nsid(sock, true); ovsthread_once_done(&once); } diff --git a/lib/odp-util.c b/lib/odp-util.c index 7729a90608..fbdfc7ad83 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2941,7 +2941,7 @@ odp_nsh_key_from_attr__(const struct nlattr *attr, bool is_mask, const struct ovs_nsh_key_md1 *md1 = nl_attr_get(a); has_md1 = true; memcpy(nsh->context, md1->context, sizeof md1->context); - if (len == 2 * sizeof(*md1)) { + if (nsh_mask && (len == 2 * sizeof *md1)) { const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; memcpy(nsh_mask->context, md1_mask->context, sizeof(*md1_mask)); @@ -4618,7 +4618,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len, } ds_put_char(ds, ')'); } - if (!has_ethtype_key) { + if (!has_ethtype_key && mask) { const struct nlattr *ma = nl_attr_find__(mask, mask_len, OVS_KEY_ATTR_ETHERTYPE); if (ma) { diff --git a/lib/pcap-file.c b/lib/pcap-file.c index b30a11c24b..41835f6f4d 100644 --- a/lib/pcap-file.c +++ b/lib/pcap-file.c @@ -89,6 +89,7 @@ ovs_pcap_open(const char *file_name, const char *mode) : mode[0] == 'w' ? "writing" : "appending"), ovs_strerror(errno)); + free(p_file); return NULL; } diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 532dedcb64..ab814cf20e 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -1231,6 +1231,15 @@ ovsdb_monitor_get_update( condition, ovsdb_monitor_compose_row_update2); if (!condition || !condition->conditional) { + if (json) { + struct json *json_serialized; + + /* Pre-serializing the object to avoid doing this + * for every client. */ + json_serialized = json_serialized_object_create(json); + json_destroy(json); + json = json_serialized; + } ovsdb_monitor_json_cache_insert(dbmon, version, mcs, json); } diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 05a0223e71..d4a9e34cc4 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -919,7 +919,8 @@ print_raft_header(const struct raft_header *h, if (!uuid_is_zero(&h->snap.eid)) { printf(" prev_eid: %04x\n", uuid_prefix(&h->snap.eid, 4)); } - print_data("prev_", h->snap.data, schemap, names); + print_data("prev_", raft_entry_get_parsed_data(&h->snap), + schemap, names); } } @@ -973,11 +974,13 @@ raft_header_to_standalone_log(const struct raft_header *h, struct ovsdb_log *db_log_data) { if (h->snap_index) { - if (!h->snap.data || json_array(h->snap.data)->n != 2) { + const struct json *data = raft_entry_get_parsed_data(&h->snap); + + if (!data || json_array(data)->n != 2) { ovs_fatal(0, "Incorrect raft header data array length"); } - struct json_array *pa = json_array(h->snap.data); + struct json_array *pa = json_array(data); struct json *schema_json = pa->elems[0]; struct ovsdb_error *error = NULL; @@ -1373,7 +1376,7 @@ do_check_cluster(struct ovs_cmdl_context *ctx) } struct raft_entry *e = &s->entries[log_idx]; e->term = r->term; - e->data = r->entry.data; + raft_entry_set_parsed_data_nocopy(e, r->entry.data); e->eid = r->entry.eid; e->servers = r->entry.servers; break; diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c index 26d39a087f..30760233ee 100644 --- a/ovsdb/raft-private.c +++ b/ovsdb/raft-private.c @@ -18,11 +18,14 @@ #include "raft-private.h" +#include "coverage.h" #include "openvswitch/dynamic-string.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "socket-util.h" #include "sset.h" + +COVERAGE_DEFINE(raft_entry_serialize); /* Addresses of Raft servers. */ @@ -281,7 +284,8 @@ void raft_entry_clone(struct raft_entry *dst, const struct raft_entry *src) { dst->term = src->term; - dst->data = json_nullable_clone(src->data); + dst->data.full_json = json_nullable_clone(src->data.full_json); + dst->data.serialized = json_nullable_clone(src->data.serialized); dst->eid = src->eid; dst->servers = json_nullable_clone(src->servers); dst->election_timer = src->election_timer; @@ -291,7 +295,8 @@ void raft_entry_uninit(struct raft_entry *e) { if (e) { - json_destroy(e->data); + json_destroy(e->data.full_json); + json_destroy(e->data.serialized); json_destroy(e->servers); } } @@ -301,8 +306,9 @@ raft_entry_to_json(const struct raft_entry *e) { struct json *json = json_object_create(); raft_put_uint64(json, "term", e->term); - if (e->data) { - json_object_put(json, "data", json_clone(e->data)); + if (raft_entry_has_data(e)) { + json_object_put(json, "data", + json_clone(raft_entry_get_serialized_data(e))); json_object_put_format(json, "eid", UUID_FMT, UUID_ARGS(&e->eid)); } if (e->servers) { @@ -323,9 +329,10 @@ raft_entry_from_json(struct json *json, struct raft_entry *e) struct ovsdb_parser p; ovsdb_parser_init(&p, json, "raft log entry"); e->term = raft_parse_required_uint64(&p, "term"); - e->data = json_nullable_clone( + raft_entry_set_parsed_data(e, ovsdb_parser_member(&p, "data", OP_OBJECT | OP_ARRAY | OP_OPTIONAL)); - e->eid = e->data ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO; + e->eid = raft_entry_has_data(e) + ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO; e->servers = json_nullable_clone( ovsdb_parser_member(&p, "servers", OP_OBJECT | OP_OPTIONAL)); if (e->servers) { @@ -344,9 +351,72 @@ bool raft_entry_equals(const struct raft_entry *a, const struct raft_entry *b) { return (a->term == b->term - && json_equal(a->data, b->data) && uuid_equals(&a->eid, &b->eid) - && json_equal(a->servers, b->servers)); + && json_equal(a->servers, b->servers) + && json_equal(raft_entry_get_parsed_data(a), + raft_entry_get_parsed_data(b))); +} + +bool +raft_entry_has_data(const struct raft_entry *e) +{ + return e->data.full_json || e->data.serialized; +} + +static void +raft_entry_data_serialize(struct raft_entry *e) +{ + if (!raft_entry_has_data(e) || e->data.serialized) { + return; + } + COVERAGE_INC(raft_entry_serialize); + e->data.serialized = json_serialized_object_create(e->data.full_json); +} + +void +raft_entry_set_parsed_data_nocopy(struct raft_entry *e, struct json *json) +{ + ovs_assert(!json || json->type != JSON_SERIALIZED_OBJECT); + e->data.full_json = json; + e->data.serialized = NULL; +} + +void +raft_entry_set_parsed_data(struct raft_entry *e, const struct json *json) +{ + raft_entry_set_parsed_data_nocopy(e, json_nullable_clone(json)); +} + +/* Returns a pointer to the fully parsed json object of the data. + * Caller takes the ownership of the result. + * + * Entry will no longer contain a fully parsed json object. + * Subsequent calls for the same raft entry will return NULL. */ +struct json * OVS_WARN_UNUSED_RESULT +raft_entry_steal_parsed_data(struct raft_entry *e) +{ + /* Ensure that serialized version exists. */ + raft_entry_data_serialize(e); + + struct json *json = e->data.full_json; + e->data.full_json = NULL; + + return json; +} + +/* Returns a pointer to the fully parsed json object of the data, if any. */ +const struct json * +raft_entry_get_parsed_data(const struct raft_entry *e) +{ + return e->data.full_json; +} + +/* Returns a pointer to the JSON_SERIALIZED_OBJECT of the data. */ +const struct json * +raft_entry_get_serialized_data(const struct raft_entry *e) +{ + raft_entry_data_serialize(CONST_CAST(struct raft_entry *, e)); + return e->data.serialized; } void @@ -402,8 +472,8 @@ raft_header_from_json__(struct raft_header *h, struct ovsdb_parser *p) * present, all of them must be. */ h->snap_index = raft_parse_optional_uint64(p, "prev_index"); if (h->snap_index) { - h->snap.data = json_nullable_clone( - ovsdb_parser_member(p, "prev_data", OP_ANY)); + raft_entry_set_parsed_data( + &h->snap, ovsdb_parser_member(p, "prev_data", OP_ANY)); h->snap.eid = raft_parse_required_uuid(p, "prev_eid"); h->snap.term = raft_parse_required_uint64(p, "prev_term"); h->snap.election_timer = raft_parse_optional_uint64( @@ -455,8 +525,9 @@ raft_header_to_json(const struct raft_header *h) if (h->snap_index) { raft_put_uint64(json, "prev_index", h->snap_index); raft_put_uint64(json, "prev_term", h->snap.term); - if (h->snap.data) { - json_object_put(json, "prev_data", json_clone(h->snap.data)); + if (raft_entry_has_data(&h->snap)) { + json_object_put(json, "prev_data", + json_clone(raft_entry_get_serialized_data(&h->snap))); } json_object_put_format(json, "prev_eid", UUID_FMT, UUID_ARGS(&h->snap.eid)); diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h index a69e37e5c2..48c6df511f 100644 --- a/ovsdb/raft-private.h +++ b/ovsdb/raft-private.h @@ -118,7 +118,10 @@ void raft_servers_format(const struct hmap *servers, struct ds *ds); * entry. */ struct raft_entry { uint64_t term; - struct json *data; + struct { + struct json *full_json; /* Fully parsed JSON object. */ + struct json *serialized; /* JSON_SERIALIZED_OBJECT version of data. */ + } data; struct uuid eid; struct json *servers; uint64_t election_timer; @@ -130,6 +133,13 @@ struct json *raft_entry_to_json(const struct raft_entry *); struct ovsdb_error *raft_entry_from_json(struct json *, struct raft_entry *) OVS_WARN_UNUSED_RESULT; bool raft_entry_equals(const struct raft_entry *, const struct raft_entry *); +bool raft_entry_has_data(const struct raft_entry *); +void raft_entry_set_parsed_data(struct raft_entry *, const struct json *); +void raft_entry_set_parsed_data_nocopy(struct raft_entry *, struct json *); +struct json *raft_entry_steal_parsed_data(struct raft_entry *) + OVS_WARN_UNUSED_RESULT; +const struct json *raft_entry_get_parsed_data(const struct raft_entry *); +const struct json *raft_entry_get_serialized_data(const struct raft_entry *); /* On disk data serialization and deserialization. */ diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 2fb5156519..ce40c5bc07 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -494,11 +494,11 @@ raft_create_cluster(const char *file_name, const char *name, .snap_index = index++, .snap = { .term = term, - .data = json_nullable_clone(data), .eid = uuid_random(), .servers = json_object_create(), }, }; + raft_entry_set_parsed_data(&h.snap, data); shash_add_nocopy(json_object(h.snap.servers), xasprintf(UUID_FMT, UUID_ARGS(&h.sid)), json_string_create(local_address)); @@ -727,10 +727,10 @@ raft_add_entry(struct raft *raft, uint64_t index = raft->log_end++; struct raft_entry *entry = &raft->entries[index - raft->log_start]; entry->term = term; - entry->data = data; entry->eid = eid ? *eid : UUID_ZERO; entry->servers = servers; entry->election_timer = election_timer; + raft_entry_set_parsed_data_nocopy(entry, data); return index; } @@ -741,13 +741,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data, const struct uuid *eid, struct json *servers, uint64_t election_timer) { + uint64_t index = raft_add_entry(raft, term, data, eid, servers, + election_timer); + const struct json *entry_data = raft_entry_get_serialized_data( + &raft->entries[index - raft->log_start]); struct raft_record r = { .type = RAFT_REC_ENTRY, .term = term, .entry = { - .index = raft_add_entry(raft, term, data, eid, servers, - election_timer), - .data = data, + .index = index, + .data = CONST_CAST(struct json *, entry_data), .servers = servers, .election_timer = election_timer, .eid = eid ? *eid : UUID_ZERO, @@ -2161,7 +2164,7 @@ raft_get_eid(const struct raft *raft, uint64_t index) { for (; index >= raft->log_start; index--) { const struct raft_entry *e = raft_get_entry(raft, index); - if (e->data) { + if (raft_entry_has_data(e)) { return &e->eid; } } @@ -2826,8 +2829,8 @@ raft_truncate(struct raft *raft, uint64_t new_end) return servers_changed; } -static const struct json * -raft_peek_next_entry(struct raft *raft, struct uuid *eid) +static const struct raft_entry * +raft_peek_next_entry(struct raft *raft) { /* Invariant: log_start - 2 <= last_applied <= commit_index < log_end. */ ovs_assert(raft->log_start <= raft->last_applied + 2); @@ -2839,32 +2842,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid) } if (raft->log_start == raft->last_applied + 2) { - *eid = raft->snap.eid; - return raft->snap.data; + return &raft->snap; } while (raft->last_applied < raft->commit_index) { const struct raft_entry *e = raft_get_entry(raft, raft->last_applied + 1); - if (e->data) { - *eid = e->eid; - return e->data; + if (raft_entry_has_data(e)) { + return e; } raft->last_applied++; } return NULL; } -static const struct json * -raft_get_next_entry(struct raft *raft, struct uuid *eid) -{ - const struct json *data = raft_peek_next_entry(raft, eid); - if (data) { - raft->last_applied++; - } - return data; -} - /* Updates commit index in raft log. If commit index is already up-to-date * it does nothing and return false, otherwise, returns true. */ static bool @@ -2878,7 +2869,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) while (raft->commit_index < new_commit_index) { uint64_t index = ++raft->commit_index; const struct raft_entry *e = raft_get_entry(raft, index); - if (e->data) { + if (raft_entry_has_data(e)) { struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid); if (cmd) { @@ -3059,7 +3050,9 @@ raft_handle_append_entries(struct raft *raft, for (; i < n_entries; i++) { const struct raft_entry *e = &entries[i]; error = raft_write_entry(raft, e->term, - json_nullable_clone(e->data), &e->eid, + json_nullable_clone( + raft_entry_get_parsed_data(e)), + &e->eid, json_nullable_clone(e->servers), e->election_timer); if (error) { @@ -3314,20 +3307,29 @@ bool raft_has_next_entry(const struct raft *raft_) { struct raft *raft = CONST_CAST(struct raft *, raft_); - struct uuid eid; - return raft_peek_next_entry(raft, &eid) != NULL; + return raft_peek_next_entry(raft) != NULL; } /* Returns the next log entry or snapshot from 'raft', or NULL if there are - * none left to read. Stores the entry ID of the log entry in '*eid'. Stores - * true in '*is_snapshot' if the returned data is a snapshot, false if it is a - * log entry. */ -const struct json * -raft_next_entry(struct raft *raft, struct uuid *eid, bool *is_snapshot) + * none left to read. Stores the entry ID of the log entry in '*eid'. + * + * The caller takes ownership of the result. */ +struct json * OVS_WARN_UNUSED_RESULT +raft_next_entry(struct raft *raft, struct uuid *eid) { - const struct json *data = raft_get_next_entry(raft, eid); - *is_snapshot = data == raft->snap.data; - return data; + const struct raft_entry *e = raft_peek_next_entry(raft); + + if (!e) { + return NULL; + } + + raft->last_applied++; + *eid = e->eid; + + /* DB will only read each entry once, so we don't need to store the fully + * parsed json object any longer. The serialized version is sufficient + * for sending to other cluster members or writing to the log. */ + return raft_entry_steal_parsed_data(CONST_CAST(struct raft_entry *, e)); } /* Returns the log index of the last-read snapshot or log entry. */ @@ -3420,6 +3422,7 @@ raft_send_install_snapshot_request(struct raft *raft, const struct raft_server *s, const char *comment) { + const struct json *data = raft_entry_get_serialized_data(&raft->snap); union raft_rpc rpc = { .install_snapshot_request = { .common = { @@ -3432,7 +3435,7 @@ raft_send_install_snapshot_request(struct raft *raft, .last_term = raft->snap.term, .last_servers = raft->snap.servers, .last_eid = raft->snap.eid, - .data = raft->snap.data, + .data = CONST_CAST(struct json *, data), .election_timer = raft->election_timer, /* use latest value */ } }; @@ -3980,6 +3983,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, uint64_t new_log_start, const struct raft_entry *new_snapshot) { + /* Ensure that new snapshot contains serialized data object, so it will + * not be allocated while serializing the on-stack raft header object. */ + ovs_assert(raft_entry_get_serialized_data(new_snapshot)); + struct raft_header h = { .sid = raft->sid, .cid = raft->cid, @@ -3998,12 +4005,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, /* Write log records. */ for (uint64_t index = new_log_start; index < raft->log_end; index++) { const struct raft_entry *e = &raft->entries[index - raft->log_start]; + const struct json *log_data = raft_entry_get_serialized_data(e); struct raft_record r = { .type = RAFT_REC_ENTRY, .term = e->term, .entry = { .index = index, - .data = e->data, + .data = CONST_CAST(struct json *, log_data), .servers = e->servers, .election_timer = e->election_timer, .eid = e->eid, @@ -4093,19 +4101,21 @@ raft_handle_install_snapshot_request__( /* Case 3: The new snapshot starts past the end of our current log, so * discard all of our current log. */ - const struct raft_entry new_snapshot = { + struct raft_entry new_snapshot = { .term = rq->last_term, - .data = rq->data, .eid = rq->last_eid, - .servers = rq->last_servers, + .servers = json_clone(rq->last_servers), .election_timer = rq->election_timer, }; + raft_entry_set_parsed_data(&new_snapshot, rq->data); + struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { char *error_s = ovsdb_error_to_string_free(error); VLOG_WARN("could not save snapshot: %s", error_s); free(error_s); + raft_entry_uninit(&new_snapshot); return false; } @@ -4120,7 +4130,7 @@ raft_handle_install_snapshot_request__( } raft_entry_uninit(&raft->snap); - raft_entry_clone(&raft->snap, &new_snapshot); + raft->snap = new_snapshot; raft_get_servers_from_log(raft, VLL_INFO); raft_get_election_timer_from_log(raft); @@ -4265,11 +4275,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) uint64_t new_log_start = raft->last_applied + 1; struct raft_entry new_snapshot = { .term = raft_get_term(raft, new_log_start - 1), - .data = json_clone(new_snapshot_data), .eid = *raft_get_eid(raft, new_log_start - 1), .servers = json_clone(raft_servers_for_index(raft, new_log_start - 1)), .election_timer = raft->election_timer, }; + raft_entry_set_parsed_data(&new_snapshot, new_snapshot_data); + struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { @@ -4286,6 +4297,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) memmove(&raft->entries[0], &raft->entries[new_log_start - raft->log_start], (raft->log_end - new_log_start) * sizeof *raft->entries); raft->log_start = new_log_start; + /* It's a snapshot of the current database state, ovsdb-server will not + * read it back. Destroying the parsed json object to not waste memory. */ + json_destroy(raft_entry_steal_parsed_data(&raft->snap)); return NULL; } diff --git a/ovsdb/raft.h b/ovsdb/raft.h index 3545c41c2c..599bc0ae86 100644 --- a/ovsdb/raft.h +++ b/ovsdb/raft.h @@ -132,8 +132,8 @@ bool raft_left(const struct raft *); bool raft_failed(const struct raft *); /* Reading snapshots and log entries. */ -const struct json *raft_next_entry(struct raft *, struct uuid *eid, - bool *is_snapshot); +struct json *raft_next_entry(struct raft *, struct uuid *eid) + OVS_WARN_UNUSED_RESULT; bool raft_has_next_entry(const struct raft *); uint64_t raft_get_applied_index(const struct raft *); diff --git a/ovsdb/storage.c b/ovsdb/storage.c index d727b1eacd..9e32efe582 100644 --- a/ovsdb/storage.c +++ b/ovsdb/storage.c @@ -268,9 +268,7 @@ ovsdb_storage_read(struct ovsdb_storage *storage, struct json *schema_json = NULL; struct json *txn_json = NULL; if (storage->raft) { - bool is_snapshot; - json = json_nullable_clone( - raft_next_entry(storage->raft, txnid, &is_snapshot)); + json = raft_next_entry(storage->raft, txnid); if (!json) { return NULL; } else if (json->type != JSON_ARRAY || json->array.n != 2) { diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 956a69e1fa..1dad6f62c6 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -9695,6 +9695,26 @@ OFPST_TABLE reply (OF1.3) (xid=0x2): OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif packet-out table meter drop]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1']) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 'in_port=1 action=meter:1,output:2']) + +ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" +ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" + +# Check that vswitchd hasn't crashed by dumping the meter added above +AT_CHECK([ovs-ofctl -O OpenFlow13 dump-meters br0 | ofctl_strip], [0], [dnl +OFPST_METER_CONFIG reply (OF1.3): +meter=1 pktps bands= +type=drop rate=1 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - ICMPv6]) OVS_VSWITCHD_START add_of_ports br0 1 diff --git a/tests/system-traffic.at b/tests/system-traffic.at index f400cfabc9..c4442c183f 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -3305,6 +3305,46 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - IPv4 Fragmentation + NAT]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +CHECK_CONNTRACK() + +OVS_TRAFFIC_VSWITCHD_START( + [set-fail-mode br0 secure -- ]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.2.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.2.1.2/24") + +dnl Create a dummy route for NAT +NS_CHECK_EXEC([at_ns1], [ip addr add 10.1.1.2/32 dev lo]) +NS_CHECK_EXEC([at_ns0], [ip route add 10.1.1.0/24 via 10.2.1.2]) +NS_CHECK_EXEC([at_ns1], [ip route add 10.1.1.0/24 via 10.2.1.1]) + +dnl Solely for debugging when things go wrong +NS_EXEC([at_ns0], [tcpdump -l -n -xx -U -i p0 -w p0.pcap >tcpdump.out 2>/dev/null &]) +NS_EXEC([at_ns1], [tcpdump -l -n -xx -U -i p1 -w p1.pcap >tcpdump.out 2>/dev/null &]) + +AT_DATA([flows.txt], [dnl +table=0,arp,actions=normal +table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat) +table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat) +table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.1.1.1)),ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0 +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl Check connectivity +NS_CHECK_EXEC([at_ns0], [ping -c 1 10.1.1.2 -M dont -s 4500 | FORMAT_PING], [0], [dnl +1 packets transmitted, 1 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - resubmit to ct multiple times]) CHECK_CONNTRACK() diff --git a/tests/test-json.c b/tests/test-json.c index a7ee595e0b..072a537252 100644 --- a/tests/test-json.c +++ b/tests/test-json.c @@ -22,6 +22,8 @@ #include #include #include "ovstest.h" +#include "random.h" +#include "timeval.h" #include "util.h" /* --pretty: If set, the JSON output is pretty-printed, instead of printed as @@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[]) } OVSTEST_REGISTER("test-json", test_json_main); + +static void +json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +{ + struct { + int n; + int quote_probablility; + int special_probability; + int iter; + } configs[] = { + { 100000, 0, 0, 1000, }, + { 100000, 2, 1, 1000, }, + { 100000, 10, 1, 1000, }, + { 10000000, 0, 0, 100, }, + { 10000000, 2, 1, 100, }, + { 10000000, 10, 1, 100, }, + { 100000000, 0, 0, 10. }, + { 100000000, 2, 1, 10, }, + { 100000000, 10, 1, 10, }, + }; + + printf(" SIZE Q S TIME\n"); + printf("--------------------------------------\n"); + + for (int i = 0; i < ARRAY_SIZE(configs); i++) { + int iter = configs[i].iter; + int n = configs[i].n; + char *str = xzalloc(n); + + for (int j = 0; j < n - 1; j++) { + int r = random_range(100); + + if (r < configs[i].special_probability) { + str[j] = random_range(' ' - 1) + 1; + } else if (r < (configs[i].special_probability + + configs[i].quote_probablility)) { + str[j] = '"'; + } else { + str[j] = random_range(256 - ' ') + ' '; + } + } + + printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility, + configs[i].special_probability); + fflush(stdout); + + struct json *json = json_string_create_nocopy(str); + uint64_t start = time_msec(); + + char **res = xzalloc(iter * sizeof *res); + for (int j = 0; j < iter; j++) { + res[j] = json_to_string(json, 0); + } + + printf("%16.3lf ms\n", (double) (time_msec() - start) / iter); + json_destroy(json); + for (int j = 0; j < iter; j++) { + free(res[j]); + } + free(res); + } + + exit(0); +} + +OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main); diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index 48c5de9d19..12fc1ef910 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -595,6 +595,62 @@ OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep 50540000000a5054000000091235 | wc OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([tunnel_push_pop - packet_out debug_slow]) + +OVS_VSWITCHD_START( + [add-port br0 p0 dnl + -- set Interface p0 type=dummy ofport_request=1 dnl + other-config:hwaddr=aa:55:aa:55:00:00]) +AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) +AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy]) +AT_CHECK([ovs-vsctl add-port int-br t2 dnl + -- set Interface t2 type=geneve options:remote_ip=1.1.2.92 dnl + options:key=123 ofport_request=2]) + +dnl First setup dummy interface IP address, then add the route +dnl so that tnl-port table can get valid IP address for the device. +AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK +]) +AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK +]) +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +dnl This ARP reply from p0 has two effects: +dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6. +dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0. +AT_CHECK([ + ovs-appctl netdev-dummy/receive p0 dnl + 'recirc_id(0),in_port(2),dnl + eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl + arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)' +]) + +AT_CHECK([ovs-vsctl -- set Interface p0 options:tx_pcap=p0.pcap]) + +packet=50540000000a505400000009123 +encap=f8bc124434b6aa55aa5500000800450000320000400040113406010102580101025c83a917c1001e00000000655800007b00 + +dnl Output to tunnel from a int-br internal port. +dnl Checking that the packet arrived and it was correctly encapsulated. +AT_CHECK([ovs-ofctl add-flow int-br "in_port=LOCAL,actions=debug_slow,output:2"]) +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep "${encap}${packet}4" | wc -l` -ge 1]) +dnl Sending again to exercise the non-miss upcall path. +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep "${encap}${packet}4" | wc -l` -ge 2]) + +dnl Output to tunnel from the controller. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out int-br CONTROLLER "debug_slow,output:2" "${packet}5"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep "${encap}${packet}5" | wc -l` -ge 1]) + +dnl Datapath actions should not have tunnel push action. +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q tnl_push], [1]) +dnl There should be slow_path action instead. +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q 'slow_path(action)'], [0]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel_push_pop - underlay bridge match]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00])