Skip to content

Commit c82ee67

Browse files
committed
set a keepalive_connections_max_age and add some testing to confirm it works
1 parent 1cb2bd5 commit c82ee67

File tree

3 files changed

+112
-0
lines changed

3 files changed

+112
-0
lines changed

config/schema.cue

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,7 @@ import "path"
414414
router: {
415415
api_backends: {
416416
keepalive_idle_timeout: uint | *120
417+
keepalive_connections_max_age: uint | *1800
417418
}
418419
trusted_proxies: [...string] | *[]
419420
global_rate_limits: {

src/api-umbrella/utils/active_config_store/set_envoy_config.lua

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ local function build_cluster_resource(cluster_name, options)
123123
-- important to improving performance by keeping pre-established
124124
-- connections around.
125125
idle_timeout = file_config["router"]["api_backends"]["keepalive_idle_timeout"] .. "s",
126+
127+
-- Maximum lifetime for any connection to an API backend, regardless
128+
-- of activity. This ensures connections are periodically
129+
-- re-established so that DNS or routing changes on the backend side
130+
-- are picked up in a timely fashion, rather than holding open
131+
-- connections indefinitely.
132+
max_connection_duration = file_config["router"]["api_backends"]["keepalive_connections_max_age"] .. "s",
126133
},
127134
},
128135
},

test/proxy/keep_alive/test_server_side.rb

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,110 @@ def test_keeps_idle_connections_open_verify_layers
9494
end
9595
end
9696

97+
def test_max_connection_age_closes_connections
98+
envoy_upstream_keepalive_connections_max_age = 5
99+
envoy_upstream_keepalive_idle_timeout = 120
100+
override_config({
101+
:router => {
102+
:api_backends => {
103+
# Set a long idle timeout so it doesn't interfere with the max age
104+
# behavior being tested.
105+
:keepalive_idle_timeout => envoy_upstream_keepalive_idle_timeout,
106+
:keepalive_connections_max_age => envoy_upstream_keepalive_connections_max_age,
107+
},
108+
},
109+
}) do
110+
# Open a bunch of concurrent connections to establish a connection pool.
111+
max_concurrency = 100
112+
hydra = Typhoeus::Hydra.new(max_concurrency: max_concurrency)
113+
requests = Array.new(300) do
114+
request = Typhoeus::Request.new("http://127.0.0.1:9080/#{unique_test_class_id}/keepalive-default/delay/500", http_options)
115+
hydra.queue(request)
116+
request
117+
end
118+
hydra.run
119+
assert_equal(300, requests.length)
120+
requests.each do |req|
121+
assert_response_code(200, req.response)
122+
end
123+
124+
# Immediately after the requests, verify that Envoy has idle connections
125+
# to the API backend (the connection pool is populated).
126+
stats = connection_stats
127+
max_concurrency_delta_buffer = (max_concurrency * 0.3).round
128+
assert_in_delta(max_concurrency, stats.fetch(:envoy_to_api_backend_active_connections_per_envoy), max_concurrency_delta_buffer)
129+
assert_in_delta(max_concurrency, stats.fetch(:envoy_to_api_backend_active_connections_per_api_backend), max_concurrency_delta_buffer)
130+
assert_in_delta(max_concurrency, stats.fetch(:envoy_to_api_backend_idle_connections_per_api_backend), max_concurrency_delta_buffer)
131+
132+
# Record the baseline count of locally-destroyed connections.
133+
baseline_destroy_local = stats.fetch(:envoy_to_api_backend_destroy_local_connections_per_envoy)
134+
135+
# Wait for the max connection age to expire and verify Envoy closes the
136+
# connections, even though the idle timeout (120s) hasn't been reached.
137+
# Also measure the timing to verify it corresponds with the configured
138+
# max age setting.
139+
begin_time = Time.now
140+
timing = nil
141+
begin
142+
stats = nil
143+
# This should generally happen within the configured timeout seconds,
144+
# but we'll add a significant timeout since we sometimes see this take
145+
# longer in CI (but the exact timing of this behavior isn't really
146+
# that important).
147+
Timeout.timeout(300) do
148+
loop do
149+
stats = connection_stats
150+
elapsed_time = Time.now - begin_time
151+
152+
# Once Envoy's active connections to the API backend drop to zero,
153+
# note the elapsed time.
154+
if !timing && stats.fetch(:envoy_to_api_backend_active_connections_per_envoy) == 0
155+
timing = elapsed_time
156+
break
157+
end
158+
159+
sleep 0.1
160+
end
161+
end
162+
rescue Timeout::Error
163+
flunk("Envoy did not close connections after max connection age expired. Last connection stats: #{stats.inspect}")
164+
end
165+
166+
# Verify Envoy closed the connections and the API backend agrees.
167+
stats = connection_stats
168+
assert_equal(0, stats.fetch(:envoy_to_api_backend_active_connections_per_envoy))
169+
assert_operator(stats.fetch(:envoy_to_api_backend_active_connections_per_api_backend), :<=, max_concurrency_delta_buffer)
170+
assert_operator(stats.fetch(:envoy_to_api_backend_idle_connections_per_api_backend), :<=, max_concurrency_delta_buffer)
171+
172+
# Verify that Envoy initiated the connection closures (destroy_local
173+
# should have increased).
174+
assert_operator(stats.fetch(:envoy_to_api_backend_destroy_local_connections_per_envoy), :>, baseline_destroy_local)
175+
176+
# Check the timing of when connections were closed. This verifies that
177+
# the observed behavior corresponds with the configured max age setting.
178+
assert_in_delta(envoy_upstream_keepalive_connections_max_age, timing, 2)
179+
180+
# Make another batch of requests after the max age has expired to ensure
181+
# that new connections work successfully.
182+
requests = Array.new(300) do
183+
request = Typhoeus::Request.new("http://127.0.0.1:9080/#{unique_test_class_id}/keepalive-default/delay/500", http_options)
184+
hydra.queue(request)
185+
request
186+
end
187+
hydra.run
188+
assert_equal(300, requests.length)
189+
requests.each do |req|
190+
assert_response_code(200, req.response)
191+
end
192+
193+
# Verify new connections were established.
194+
stats = connection_stats
195+
assert_in_delta(max_concurrency, stats.fetch(:envoy_to_api_backend_active_connections_per_envoy), max_concurrency_delta_buffer)
196+
assert_in_delta(max_concurrency, stats.fetch(:envoy_to_api_backend_active_connections_per_api_backend), max_concurrency_delta_buffer)
197+
assert_in_delta(max_concurrency, stats.fetch(:envoy_to_api_backend_idle_connections_per_api_backend), max_concurrency_delta_buffer)
198+
end
199+
end
200+
97201
def test_concurrent_backend_connections_can_exceed_keepalive_count
98202
max_values = {
99203
client_to_nginx_router_active_connections_per_nginx_router: 0,

0 commit comments

Comments
 (0)