Blame otp-0010-Improve-nodes-querying.patch

54bf40
From: Peter Lemenkov <lemenkov@gmail.com>
54bf40
Date: Wed, 24 Oct 2018 14:58:41 +0200
54bf40
Subject: [PATCH] Improve nodes querying
54bf40
54bf40
We've got a few similar stacktraces once. See the following one for
54bf40
example:
54bf40
54bf40
** Reason for termination ==
54bf40
** {badarg,
54bf40
       [{ets,next,[sys_dist,'rabbitmq-cli-42@host.example.com'],[]},
54bf40
        {net_kernel,get_nodes,2,[{file,"net_kernel.erl"},{line,1025}]},
54bf40
        {net_kernel,get_nodes,2,[{file,"net_kernel.erl"},{line,1019}]},
54bf40
        {net_kernel,get_nodes_info,0,[{file,"net_kernel.erl"},{line,1439}]},
54bf40
        {rabbit_mgmt_external_stats,cluster_links,0,
54bf40
            [{file,"src/rabbit_mgmt_external_stats.erl"},{line,252}]},
54bf40
        {rabbit_mgmt_external_stats,emit_node_node_stats,1,
54bf40
            [{file,"src/rabbit_mgmt_external_stats.erl"},{line,366}]},
54bf40
        {rabbit_mgmt_external_stats,handle_info,2,
54bf40
            [{file,"src/rabbit_mgmt_external_stats.erl"},{line,347}]},
54bf40
        {gen_server,try_dispatch,4,[{file,"gen_server.erl"},{line,615}]}]}
54bf40
54bf40
The problem is that when we're trying to query a list of connected
54bf40
nodes, we're doing it in the following way:
54bf40
54bf40
  Call for the first record in ETS
54bf40
  While not EOF:
54bf40
    Call for the next record in ETS
54bf40
54bf40
What happens, when some Node disconnects during the "not EOF" loop?
54bf40
We'll get an exception.
54bf40
54bf40
Let's do it differently - query a list of nodes in one shot, and then
54bf40
get info from each of the nodes in list (w/o extra calls to ets). These
54bf40
individual calls care of disconnected nodes so everything will be fine
54bf40
even if a node disconnects.
54bf40
54bf40
Signed-off-by: Peter Lemenkov <lemenkov@gmail.com>
54bf40
54bf40
diff --git a/lib/kernel/src/net_kernel.erl b/lib/kernel/src/net_kernel.erl
4c0d16
index 83d3b4b5e1..bebefc2e08 100644
54bf40
--- a/lib/kernel/src/net_kernel.erl
54bf40
+++ b/lib/kernel/src/net_kernel.erl
0cc610
@@ -670,24 +670,16 @@ code_change(_OldVsn, State, _Extra) ->
54bf40
 
54bf40
 terminate(no_network, State) ->
54bf40
     lists:foreach(
54bf40
-      fun({Node, Type}) ->
54bf40
-	      case Type of
54bf40
-		  normal -> ?nodedown(Node, State);
54bf40
-		  _ -> ok
54bf40
-	      end
54bf40
-      end, get_up_nodes() ++ [{node(), normal}]);
54bf40
+      fun(Node) -> ?nodedown(Node, State)
54bf40
+      end, get_nodes_up_normal() ++ [node()]);
54bf40
 terminate(_Reason, State) ->
54bf40
     lists:foreach(
54bf40
       fun(#listen {listen = Listen,module = Mod}) ->
54bf40
 	      Mod:close(Listen)
54bf40
       end, State#state.listen),
54bf40
     lists:foreach(
54bf40
-      fun({Node, Type}) ->
54bf40
-	      case Type of
54bf40
-		  normal -> ?nodedown(Node, State);
54bf40
-		  _ -> ok
54bf40
-	      end
54bf40
-      end, get_up_nodes() ++ [{node(), normal}]).
54bf40
+      fun(Node) -> ?nodedown(Node, State)
54bf40
+      end, get_nodes_up_normal() ++ [node()]).
54bf40
 
54bf40
 
54bf40
 %% ------------------------------------------------------------
4c0d16
@@ -1147,35 +1139,10 @@ disconnect_pid(Pid, State) ->
54bf40
 %%
54bf40
 %%
54bf40
 %%
54bf40
-get_nodes(Which) ->
54bf40
-    get_nodes(ets:first(sys_dist), Which).
54bf40
 
54bf40
-get_nodes('$end_of_table', _) ->
54bf40
-    [];
54bf40
-get_nodes(Key, Which) ->
54bf40
-    case ets:lookup(sys_dist, Key) of
54bf40
-	[Conn = #connection{state = up}] ->
54bf40
-	    [Conn#connection.node | get_nodes(ets:next(sys_dist, Key),
54bf40
-					      Which)];
54bf40
-	[Conn = #connection{}] when Which =:= all ->
54bf40
-	    [Conn#connection.node | get_nodes(ets:next(sys_dist, Key),
54bf40
-					      Which)];
54bf40
-	_ ->
54bf40
-	    get_nodes(ets:next(sys_dist, Key), Which)
54bf40
-    end.
54bf40
-
54bf40
-%% Return a list of all nodes that are 'up'.
54bf40
-get_up_nodes() ->
54bf40
-    get_up_nodes(ets:first(sys_dist)).
54bf40
-
54bf40
-get_up_nodes('$end_of_table') -> [];
54bf40
-get_up_nodes(Key) ->
54bf40
-    case ets:lookup(sys_dist, Key) of
54bf40
- 	[#connection{state=up,node=Node,type=Type}] ->
54bf40
- 	    [{Node,Type}|get_up_nodes(ets:next(sys_dist, Key))];
54bf40
- 	_ ->
54bf40
- 	    get_up_nodes(ets:next(sys_dist, Key))
54bf40
-    end.
54bf40
+%% Return a list of all nodes that are 'up' and not hidden.
54bf40
+get_nodes_up_normal() ->
54bf40
+    ets:select(sys_dist, [{#connection{node = '$1', state = up, type = normal, _ = '_'}, [], ['$1']}]).
54bf40
 
54bf40
 ticker(Kernel, Tick) when is_integer(Tick) ->
54bf40
     process_flag(priority, max),
4c0d16
@@ -1640,15 +1607,14 @@ get_node_info(Node, Key) ->
54bf40
     end.
54bf40
 
54bf40
 get_nodes_info() ->
54bf40
-    get_nodes_info(get_nodes(all), []).
54bf40
-
54bf40
-get_nodes_info([Node|Nodes], InfoList) ->
54bf40
-    case get_node_info(Node) of
54bf40
-	{ok, Info} -> get_nodes_info(Nodes, [{Node, Info}|InfoList]);
54bf40
-	_          -> get_nodes_info(Nodes, InfoList)
54bf40
-    end;
54bf40
-get_nodes_info([], InfoList) ->
54bf40
-    {ok, InfoList}.
54bf40
+    Nodes = ets:select(sys_dist, [{#connection{node = '$1', _ = '_'}, [], ['$1']}]),
54bf40
+    {ok, lists:filtermap(
54bf40
+        fun(Node) ->
54bf40
+            case get_node_info(Node) of
54bf40
+                {ok, Info} -> {true, {Node, Info}};
54bf40
+                _ -> false
54bf40
+             end
54bf40
+        end, Nodes)}.
54bf40
 
54bf40
 %% ------------------------------------------------------------
54bf40
 %% Misc. functions