15 """Run xDS integration tests on GCP using Traffic Director."""
33 import googleapiclient.discovery
35 from oauth2client.client
import GoogleCredentials
39 from src.proto.grpc.health.v1
import health_pb2
40 from src.proto.grpc.health.v1
import health_pb2_grpc
41 from src.proto.grpc.testing
import empty_pb2
42 from src.proto.grpc.testing
import messages_pb2
43 from src.proto.grpc.testing
import test_pb2_grpc
48 from envoy.extensions.filters.common.fault.v3
import fault_pb2
49 from envoy.extensions.filters.http.fault.v3
import fault_pb2
50 from envoy.extensions.filters.http.router.v3
import router_pb2
51 from envoy.extensions.filters.network.http_connection_manager.v3
import \
52 http_connection_manager_pb2
53 from envoy.service.status.v3
import csds_pb2
54 from envoy.service.status.v3
import csds_pb2_grpc
60 logger = logging.getLogger()
61 console_handler = logging.StreamHandler()
62 formatter = logging.Formatter(fmt=
'%(asctime)s: %(levelname)-8s %(message)s')
63 console_handler.setFormatter(formatter)
65 logger.addHandler(console_handler)
66 logger.setLevel(logging.WARNING)
69 original_grpc_trace = os.environ.pop(
'GRPC_TRACE',
None)
70 original_grpc_verbosity = os.environ.pop(
'GRPC_VERBOSITY',
None)
72 logging.getLogger(
'google_auth_httplib2').setLevel(logging.WARNING)
73 logging.getLogger(
'googleapiclient.discovery').setLevel(logging.WARNING)
77 'change_backend_service',
79 'load_report_based_failover',
81 'remove_instance_group',
83 'secondary_locality_gets_no_requests_on_partial_primary_failure',
84 'secondary_locality_gets_requests_on_primary_failure',
89 'forwarding_rule_port_match',
90 'forwarding_rule_default_port',
98 _ADDITIONAL_TEST_CASES = [
106 _V3_TEST_CASES = frozenset([
'timeout',
'fault_injection',
'csds'])
109 _ALPHA_TEST_CASES = frozenset([
'timeout'])
115 arg_split = arg.split(
',')
117 all_test_cases = _TEST_CASES + _ADDITIONAL_TEST_CASES
118 for arg
in arg_split:
120 test_cases = test_cases.union(_TEST_CASES)
122 test_cases = test_cases.union([arg])
123 if not all([test_case
in all_test_cases
for test_case
in test_cases]):
124 raise Exception(
'Failed to parse test cases %s' % arg)
126 return [x
for x
in all_test_cases
if x
in test_cases]
132 return list(
range(port, port + 1))
134 port_min, port_max = port_arg.split(
':')
135 return list(
range(
int(port_min),
int(port_max) + 1))
138 argp = argparse.ArgumentParser(description=
'Run xDS interop tests on GCP')
140 argp.add_argument(
'--project_id', default=
'grpc-testing', help=
'GCP project id')
141 argp.add_argument(
'--project_num',
142 default=
'830293263384',
143 help=
'GCP project number')
147 help=
'Optional suffix for all generated GCP resource names. Useful to '
148 'ensure distinct names across test runs.')
152 type=parse_test_cases,
153 help=
'Comma-separated list of test cases to run. Available tests: %s, '
154 '(or \'all\' to run every test). '
155 'Alternative tests not included in \'all\': %s' %
156 (
','.join(_TEST_CASES),
','.join(_ADDITIONAL_TEST_CASES)))
160 help=
'File to reference via GRPC_XDS_BOOTSTRAP. Disables built-in '
161 'bootstrap generation')
166 help=
'Support xDS v3 via GRPC_XDS_EXPERIMENTAL_V3_SUPPORT. '
167 'If a pre-created bootstrap file is provided via the --bootstrap_file '
168 'parameter, it should include xds_v3 in its server_features field.')
172 help=
'Command to launch xDS test client. {server_uri}, {stats_port} and '
173 '{qps} references will be replaced using str.format(). GRPC_XDS_BOOTSTRAP '
174 'will be set for the command')
178 help=
'Comma-separated list of hosts running client processes. If set, '
179 '--client_cmd is ignored and client processes are assumed to be running on '
180 'the specified hosts.')
181 argp.add_argument(
'--zone', default=
'us-central1-a')
182 argp.add_argument(
'--secondary_zone',
183 default=
'us-west1-b',
184 help=
'Zone to use for secondary TD locality tests')
185 argp.add_argument(
'--qps', default=100, type=int, help=
'Client QPS')
187 '--wait_for_backend_sec',
190 help=
'Time limit for waiting for created backend services to report '
191 'healthy when launching or updated GCP resources')
193 '--use_existing_gcp_resources',
197 'If set, find and use already created GCP resources instead of creating new'
200 '--keep_gcp_resources',
204 'Leave GCP VMs and configuration running after test. Default behavior is '
205 'to delete when tests complete.')
206 argp.add_argument(
'--halt_after_fail',
208 help=
'Halt and save the resources when test failed.')
210 '--compute_discovery_document',
214 'If provided, uses this file instead of retrieving via the GCP discovery '
217 '--alpha_compute_discovery_document',
220 help=
'If provided, uses this file instead of retrieving via the alpha GCP '
222 argp.add_argument(
'--network',
223 default=
'global/networks/default',
224 help=
'GCP network to use')
225 _DEFAULT_PORT_RANGE =
'8080:8280'
226 argp.add_argument(
'--service_port_range',
227 default=_DEFAULT_PORT_RANGE,
228 type=parse_port_range,
229 help=
'Listening port for created gRPC backends. Specified as '
230 'either a single int or as a range in the format min:max, in '
231 'which case an available port p will be chosen s.t. min <= p '
237 help=
'Local port for the client process to expose the LB stats service')
238 argp.add_argument(
'--xds_server',
239 default=
'trafficdirector.googleapis.com:443',
241 argp.add_argument(
'--source_image',
242 default=
'projects/debian-cloud/global/images/family/debian-9',
243 help=
'Source image for VMs created during the test')
244 argp.add_argument(
'--path_to_server_binary',
247 help=
'If set, the server binary must already be pre-built on '
248 'the specified source image')
249 argp.add_argument(
'--machine_type',
250 default=
'e2-standard-2',
251 help=
'Machine type for VMs created during the test')
253 '--instance_group_size',
256 help=
'Number of VMs to create per instance group. Certain test cases (e.g., '
257 'round_robin) may not give meaningful results if this is set to a value '
259 argp.add_argument(
'--verbose',
260 help=
'verbose log output',
265 argp.add_argument(
'--log_client_output',
266 help=
'Log captured client output',
271 argp.add_argument(
'--only_stable_gcp_apis',
272 help=
'Do not use alpha compute APIs. Some tests may be '
273 'incompatible with this option (gRPC health checks are '
274 'currently alpha and required for simulating server failure',
277 args = argp.parse_args()
280 logger.setLevel(logging.DEBUG)
283 if args.client_hosts:
284 CLIENT_HOSTS = args.client_hosts.split(
',')
291 _WAIT_FOR_URL_MAP_PATCH_SEC = 600
296 _WAIT_FOR_STATS_SEC = _WAIT_FOR_URL_MAP_PATCH_SEC
298 _DEFAULT_SERVICE_PORT = 80
299 _WAIT_FOR_BACKEND_SEC = args.wait_for_backend_sec
300 _WAIT_FOR_OPERATION_SEC = 1200
301 _INSTANCE_GROUP_SIZE = args.instance_group_size
302 _NUM_TEST_RPCS = 10 * args.qps
303 _CONNECTION_TIMEOUT_SEC = 60
305 _BOOTSTRAP_TEMPLATE =
"""
310 "TRAFFICDIRECTOR_NETWORK_NAME": "%s",
311 "com.googleapis.trafficdirector.config_time_trace": "TRUE"
321 "type": "google_default",
325 "server_features": {server_features}
327 }}""" % (args.network.split(
'/')[-1], args.zone, args.xds_server)
332 _TESTS_TO_FAIL_ON_RPC_FAILURE = [
'ping_pong',
'round_robin']
334 _TESTS_TO_RUN_MULTIPLE_RPCS = [
'path_matching',
'header_matching']
336 _TESTS_TO_SEND_METADATA = [
'header_matching']
337 _TEST_METADATA_KEY =
'xds_md'
338 _TEST_METADATA_VALUE_UNARY =
'unary_yranu'
339 _TEST_METADATA_VALUE_EMPTY =
'empty_ytpme'
341 _TEST_METADATA_NUMERIC_KEY =
'xds_md_numeric'
342 _TEST_METADATA_NUMERIC_VALUE =
'159'
343 _PATH_MATCHER_NAME =
'path-matcher'
344 _BASE_TEMPLATE_NAME =
'test-template'
345 _BASE_INSTANCE_GROUP_NAME =
'test-ig'
346 _BASE_HEALTH_CHECK_NAME =
'test-hc'
347 _BASE_FIREWALL_RULE_NAME =
'test-fw-rule'
348 _BASE_BACKEND_SERVICE_NAME =
'test-backend-service'
349 _BASE_URL_MAP_NAME =
'test-map'
350 _BASE_SERVICE_HOST =
'grpc-test'
351 _BASE_TARGET_PROXY_NAME =
'test-target-proxy'
352 _BASE_FORWARDING_RULE_NAME =
'test-forwarding-rule'
353 _TEST_LOG_BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),
355 _SPONGE_LOG_NAME =
'sponge_log.log'
356 _SPONGE_XML_NAME =
'sponge_log.xml'
363 hosts = [
'localhost']
366 (host, args.stats_port))
as channel:
367 stub = test_pb2_grpc.LoadBalancerStatsServiceStub(channel)
368 request = messages_pb2.LoadBalancerStatsRequest()
369 request.num_rpcs = num_rpcs
370 request.timeout_sec = timeout_sec
371 rpc_timeout = timeout_sec + _CONNECTION_TIMEOUT_SEC
372 logger.debug(
'Invoking GetClientStats RPC to %s:%d:', host,
374 response = stub.GetClientStats(request,
377 logger.debug(
'Invoked GetClientStats RPC to %s: %s', host,
378 json_format.MessageToJson(response))
386 hosts = [
'localhost']
389 (host, args.stats_port))
as channel:
390 stub = test_pb2_grpc.LoadBalancerStatsServiceStub(channel)
391 request = messages_pb2.LoadBalancerAccumulatedStatsRequest()
392 logger.debug(
'Invoking GetClientAccumulatedStats RPC to %s:%d:',
393 host, args.stats_port)
394 response = stub.GetClientAccumulatedStats(
395 request, wait_for_ready=
True, timeout=_CONNECTION_TIMEOUT_SEC)
396 logger.debug(
'Invoked GetClientAccumulatedStats RPC to %s: %s',
405 hosts = [
'localhost']
407 server_address =
'%s:%d' % (host, args.stats_port)
409 stub = csds_pb2_grpc.ClientStatusDiscoveryServiceStub(channel)
410 logger.debug(
'Fetching xDS config dump from %s', server_address)
411 response = stub.FetchClientStatus(csds_pb2.ClientStatusRequest(),
413 timeout=_CONNECTION_TIMEOUT_SEC)
414 logger.debug(
'Fetched xDS config dump from %s', server_address)
415 if len(response.config) != 1:
416 logger.error(
'Unexpected number of ClientConfigs %d: %s',
417 len(response.config), response)
423 return json_format.MessageToDict(
424 response.config[0], preserving_proto_field_name=
True)
431 hosts = [
'localhost']
434 (host, args.stats_port))
as channel:
435 stub = test_pb2_grpc.XdsUpdateClientConfigureServiceStub(channel)
436 request = messages_pb2.ClientConfigureRequest()
437 request.types.extend(rpc_types)
438 for rpc_type, md_key, md_value
in metadata:
439 md = request.metadata.add()
444 request.timeout_sec = timeout_sec
446 'Invoking XdsUpdateClientConfigureService RPC to %s:%d: %s',
447 host, args.stats_port, request)
448 stub.Configure(request,
450 timeout=_CONNECTION_TIMEOUT_SEC)
451 logger.debug(
'Invoked XdsUpdateClientConfigureService RPC to %s',
461 start_time = time.time()
463 logger.debug(
'Waiting for %d sec until backends %s receive load' %
464 (timeout_sec, backends))
465 while time.time() - start_time <= timeout_sec:
468 rpcs_by_peer = stats.rpcs_by_peer
469 for backend
in backends:
470 if backend
not in rpcs_by_peer:
471 error_msg =
'Backend %s did not receive load' % backend
473 if not error_msg
and len(rpcs_by_peer) >
len(backends):
474 error_msg =
'Unexpected backend received load: %s' % rpcs_by_peer
475 if not allow_failures
and stats.num_failures > 0:
476 error_msg =
'%d RPCs failed' % stats.num_failures
479 raise RpcDistributionError(error_msg)
484 num_rpcs=_NUM_TEST_RPCS):
493 num_rpcs=_NUM_TEST_RPCS):
497 allow_failures=
False)
501 start_time = time.time()
502 while time.time() - start_time <= timeout_sec:
505 rpcs_by_peer = stats.rpcs_by_peer
506 for backend
in backends:
507 if backend
in rpcs_by_peer:
508 error_msg =
'Unexpected backend %s receives load' % backend
512 raise Exception(
'Unexpected RPCs going to given backends')
516 '''Block until the test client reaches the state with the given number
517 of RPCs being outstanding stably.
520 rpc_type: A string indicating the RPC method to check for. Either
521 'UnaryCall' or 'EmptyCall'.
522 timeout_sec: Maximum number of seconds to wait until the desired state
524 num_rpcs: Expected number of RPCs to be in-flight.
525 threshold: Number within [0,100], the tolerable percentage by which
526 the actual number of RPCs in-flight can differ from the expected number.
528 if threshold < 0
or threshold > 100:
529 raise ValueError(
'Value error: Threshold should be between 0 to 100')
530 threshold_fraction = threshold / 100.0
531 start_time = time.time()
534 'Waiting for %d sec until %d %s RPCs (with %d%% tolerance) in-flight' %
535 (timeout_sec, num_rpcs, rpc_type, threshold))
536 while time.time() - start_time <= timeout_sec:
540 logger.debug(
'Progress: %s', error_msg)
550 raise Exception(
"Wrong number of %s RPCs in-flight: %s" %
551 (rpc_type, error_msg))
557 rpcs_started = stats.num_rpcs_started_by_method[rpc_type]
558 rpcs_succeeded = stats.num_rpcs_succeeded_by_method[rpc_type]
559 rpcs_failed = stats.num_rpcs_failed_by_method[rpc_type]
560 rpcs_in_flight = rpcs_started - rpcs_succeeded - rpcs_failed
561 if rpcs_in_flight < (num_rpcs * (1 - threshold_fraction)):
562 error_msg = (
'actual(%d) < expected(%d - %d%%)' %
563 (rpcs_in_flight, num_rpcs, threshold))
564 elif rpcs_in_flight > (num_rpcs * (1 + threshold_fraction)):
565 error_msg = (
'actual(%d) > expected(%d + %d%%)' %
566 (rpcs_in_flight, num_rpcs, threshold))
572 """Compare if two distributions are similar.
575 actual_distribution: A list of floats, contains the actual distribution.
576 expected_distribution: A list of floats, contains the expected distribution.
577 threshold: Number within [0,100], the threshold percentage by which the
578 actual distribution can differ from the expected distribution.
581 The similarity between the distributions as a boolean. Returns true if the
582 actual distribution lies within the threshold of the expected
583 distribution, false otherwise.
586 ValueError: if threshold is not with in [0,100].
587 Exception: containing detailed error messages.
589 if len(expected_distribution) !=
len(actual_distribution):
591 'Error: expected and actual distributions have different size (%d vs %d)'
592 % (
len(expected_distribution),
len(actual_distribution)))
593 if threshold < 0
or threshold > 100:
594 raise ValueError(
'Value error: Threshold should be between 0 to 100')
595 threshold_fraction = threshold / 100.0
596 for expected, actual
in zip(expected_distribution, actual_distribution):
597 if actual < (expected * (1 - threshold_fraction)):
598 raise Exception(
"actual(%f) < expected(%f-%d%%)" %
599 (actual, expected, threshold))
600 if actual > (expected * (1 + threshold_fraction)):
601 raise Exception(
"actual(%f) > expected(%f+%d%%)" %
602 (actual, expected, threshold))
607 """Compare if stats have expected instances for each type of RPC.
610 stats: LoadBalancerStatsResponse reported by interop client.
611 expected_instances: a dict with key as the RPC type (string), value as
612 the expected backend instances (list of strings).
615 Returns true if the instances are expected. False if not.
617 for rpc_type, expected_peers
in list(expected_instances.items()):
618 rpcs_by_peer_for_type = stats.rpcs_by_method[rpc_type]
619 rpcs_by_peer = rpcs_by_peer_for_type.rpcs_by_peer
if rpcs_by_peer_for_type
else None
620 logger.debug(
'rpc: %s, by_peer: %s', rpc_type, rpcs_by_peer)
621 peers = list(rpcs_by_peer.keys())
622 if set(peers) !=
set(expected_peers):
623 logger.info(
'unexpected peers for %s, got %s, want %s', rpc_type,
624 peers, expected_peers)
630 logger.info(
'Running test_backends_restart')
632 num_instances =
len(instance_names)
633 start_time = time.time()
639 _WAIT_FOR_BACKEND_SEC)
645 _WAIT_FOR_BACKEND_SEC)
649 alternate_backend_service,
650 same_zone_instance_group):
651 logger.info(
'Running test_change_backend_service')
654 same_zone_instance_group)
656 [same_zone_instance_group])
659 same_zone_instance_group)
666 _WAIT_FOR_URL_MAP_PATCH_SEC)
671 if passed
or not args.halt_after_fail:
678 primary_instance_group,
679 secondary_instance_group,
680 swapped_primary_and_secondary=False):
681 logger.info(
'Running test_gentle_failover')
683 min_instances_for_gentle_failover = 3
686 if num_primary_instances < min_instances_for_gentle_failover:
688 min_instances_for_gentle_failover)
690 gcp, backend_service,
691 [primary_instance_group, secondary_instance_group])
694 secondary_instance_group)
697 secondary_instance_group)
700 instances_to_stop = primary_instance_names[:-1]
701 remaining_instances = primary_instance_names[-1:]
707 remaining_instances + secondary_instance_names,
708 _WAIT_FOR_BACKEND_SEC)
713 except RpcDistributionError
as e:
715 gcp, secondary_instance_group):
719 secondary_instance_group,
720 primary_instance_group,
721 swapped_primary_and_secondary=
True)
729 if passed
or not args.halt_after_fail:
731 [primary_instance_group])
733 num_primary_instances)
736 _WAIT_FOR_BACKEND_SEC)
740 primary_instance_group,
741 secondary_instance_group):
742 logger.info(
'Running test_load_report_based_failover')
746 gcp, backend_service,
747 [primary_instance_group, secondary_instance_group])
750 secondary_instance_group)
753 secondary_instance_group)
758 max_rate =
int(args.qps * 1 / 5)
759 logger.info(
'Patching backend service to RATE with %d max_rate',
763 backend_service, [primary_instance_group, secondary_instance_group],
764 balancing_mode=
'RATE',
767 primary_instance_names + secondary_instance_names,
768 _WAIT_FOR_BACKEND_SEC)
772 max_rate =
int(args.qps * 6 / 5)
773 logger.info(
'Patching backend service to RATE with %d max_rate',
777 backend_service, [primary_instance_group, secondary_instance_group],
778 balancing_mode=
'RATE',
781 _WAIT_FOR_BACKEND_SEC)
782 logger.info(
"success")
787 if passed
or not args.halt_after_fail:
789 [primary_instance_group])
792 _WAIT_FOR_BACKEND_SEC)
796 logger.info(
'Running test_ping_pong')
804 same_zone_instance_group):
805 logger.info(
'Running test_remove_instance_group')
810 [instance_group, same_zone_instance_group],
811 balancing_mode=
'RATE')
814 same_zone_instance_group)
817 same_zone_instance_group)
820 instance_names + same_zone_instance_names,
821 _WAIT_FOR_OPERATION_SEC)
822 remaining_instance_group = same_zone_instance_group
823 remaining_instance_names = same_zone_instance_names
824 except RpcDistributionError
as e:
830 instance_names, _WAIT_FOR_STATS_SEC)
831 remaining_instance_group = same_zone_instance_group
832 remaining_instance_names = same_zone_instance_names
833 except RpcDistributionError
as e:
835 same_zone_instance_names, _WAIT_FOR_STATS_SEC)
836 remaining_instance_group = instance_group
837 remaining_instance_names = instance_names
839 backend_service, [remaining_instance_group],
840 balancing_mode=
'RATE')
842 _WAIT_FOR_BACKEND_SEC)
847 if passed
or not args.halt_after_fail:
850 _WAIT_FOR_BACKEND_SEC)
854 logger.info(
'Running test_round_robin')
867 for i
in range(max_attempts):
869 requests_received = [stats.rpcs_by_peer[x]
for x
in stats.rpcs_by_peer]
870 total_requests_received =
sum(requests_received)
871 if total_requests_received != _NUM_TEST_RPCS:
872 logger.info(
'Unexpected RPC failures, retrying: %s', stats)
874 expected_requests = total_requests_received /
len(instance_names)
875 for instance
in instance_names:
876 if abs(stats.rpcs_by_peer[instance] -
877 expected_requests) > threshold:
879 'RPC peer distribution differs from expected by more than %d '
880 'for instance %s (%s)' % (threshold, instance, stats))
882 raise Exception(
'RPC failures persisted through %d retries' % max_attempts)
888 primary_instance_group,
889 secondary_instance_group,
890 swapped_primary_and_secondary=False):
892 'Running secondary_locality_gets_no_requests_on_partial_primary_failure'
897 gcp, backend_service,
898 [primary_instance_group, secondary_instance_group])
901 secondary_instance_group)
905 instances_to_stop = primary_instance_names[:1]
906 remaining_instances = primary_instance_names[1:]
912 _WAIT_FOR_BACKEND_SEC)
917 except RpcDistributionError
as e:
919 gcp, secondary_instance_group):
924 secondary_instance_group,
925 primary_instance_group,
926 swapped_primary_and_secondary=
True)
931 if passed
or not args.halt_after_fail:
933 [primary_instance_group])
939 primary_instance_group,
940 secondary_instance_group,
941 swapped_primary_and_secondary=False):
942 logger.info(
'Running secondary_locality_gets_requests_on_primary_failure')
946 gcp, backend_service,
947 [primary_instance_group, secondary_instance_group])
950 secondary_instance_group)
953 secondary_instance_group)
961 _WAIT_FOR_BACKEND_SEC)
966 except RpcDistributionError
as e:
968 gcp, secondary_instance_group):
973 secondary_instance_group,
974 primary_instance_group,
975 swapped_primary_and_secondary=
True)
980 if passed
or not args.halt_after_fail:
982 [primary_instance_group])
986 instance_group, alternate_backend_service,
987 same_zone_instance_group):
989 This function prepares the services to be ready for tests that modifies
993 Returns original and alternate backend names as lists of strings.
995 logger.info(
'waiting for original backends to become healthy')
999 [same_zone_instance_group])
1000 logger.info(
'waiting for alternate to become healthy')
1002 same_zone_instance_group)
1005 logger.info(
'original backends instances: %s', original_backend_instances)
1008 same_zone_instance_group)
1009 logger.info(
'alternate backends instances: %s', alternate_backend_instances)
1012 logger.info(
'waiting for traffic to all go to original backends')
1014 _WAIT_FOR_STATS_SEC)
1015 return original_backend_instances, alternate_backend_instances
1019 alternate_backend_service, same_zone_instance_group):
1020 logger.info(
"Running test_metadata_filter")
1024 same_zone_instance_group)
1026 [same_zone_instance_group])
1028 same_zone_instance_group)
1031 with open(bootstrap_path)
as f:
1032 md = json.load(f)[
'node'][
'metadata']
1034 for k, v
in list(md.items()):
1035 match_labels.append({
'name': k,
'value': v})
1037 not_match_labels = [{
'name':
'fake',
'value':
'fail'}]
1038 test_route_rules = [
1046 'metadataFilters': [{
1047 'filterMatchCriteria':
'MATCH_ALL',
1048 'filterLabels': not_match_labels
1051 'service': original_backend_service.url
1058 'metadataFilters': [{
1059 'filterMatchCriteria':
'MATCH_ALL',
1060 'filterLabels': match_labels
1063 'service': alternate_backend_service.url
1074 'metadataFilters': [{
1075 'filterMatchCriteria':
'MATCH_ALL',
1076 'filterLabels': not_match_labels + match_labels
1079 'service': original_backend_service.url
1086 'metadataFilters': [{
1087 'filterMatchCriteria':
'MATCH_ANY',
1088 'filterLabels': not_match_labels + match_labels
1091 'service': alternate_backend_service.url
1101 'metadataFilters': [{
1102 'filterMatchCriteria':
'MATCH_ANY',
1103 'filterLabels': not_match_labels
1106 'service': original_backend_service.url
1113 'metadataFilters': [{
1114 'filterMatchCriteria':
'MATCH_ANY',
1115 'filterLabels': not_match_labels + match_labels
1118 'service': alternate_backend_service.url
1128 'metadataFilters': [{
1129 'filterMatchCriteria':
'MATCH_ANY',
1130 'filterLabels': match_labels
1133 'service': alternate_backend_service.url
1140 'metadataFilters': [{
1141 'filterMatchCriteria':
'MATCH_ALL',
1142 'filterLabels': match_labels
1145 'service': original_backend_service.url
1150 for route_rules
in test_route_rules:
1152 _WAIT_FOR_STATS_SEC)
1154 original_backend_service,
1155 route_rules=route_rules)
1157 _WAIT_FOR_STATS_SEC)
1159 alternate_backend_instances, _WAIT_FOR_STATS_SEC)
1165 if passed
or not args.halt_after_fail:
1170 alternate_backend_service):
1171 logger.info(
"Running api_listener")
1177 _WAIT_FOR_STATS_SEC)
1182 new_config_suffix =
'2'
1183 url_map_2 =
create_url_map(gcp, url_map_name + new_config_suffix,
1184 backend_service, service_host_name)
1186 gcp, target_proxy_name + new_config_suffix,
False, url_map_2)
1187 if not gcp.service_port:
1189 'Faied to find a valid port for the forwarding rule')
1190 potential_ip_addresses = []
1192 for i
in range(max_attempts):
1193 potential_ip_addresses.append(
'10.10.10.%d' %
1194 (random.randint(0, 255)))
1196 forwarding_rule_name + new_config_suffix,
1198 potential_ip_addresses, target_proxy_2)
1199 if gcp.service_port != _DEFAULT_SERVICE_PORT:
1201 url_map_name + new_config_suffix,
1205 _WAIT_FOR_STATS_SEC)
1210 verify_attempts =
int(_WAIT_FOR_URL_MAP_PATCH_SEC / _NUM_TEST_RPCS *
1212 for i
in range(verify_attempts):
1214 _WAIT_FOR_STATS_SEC)
1218 _WAIT_FOR_STATS_SEC)
1224 if passed
or not args.halt_after_fail:
1232 potential_service_ports)
1233 if gcp.service_port != _DEFAULT_SERVICE_PORT:
1237 server_uri = service_host_name +
':' +
str(gcp.service_port)
1239 server_uri = service_host_name
1244 logger.info(
"Running test_forwarding_rule_port_match")
1250 _WAIT_FOR_STATS_SEC)
1254 if x != gcp.service_port
1257 _WAIT_FOR_STATS_SEC)
1262 if passed
or not args.halt_after_fail:
1265 potential_service_ports)
1266 if gcp.service_port != _DEFAULT_SERVICE_PORT:
1270 server_uri = service_host_name +
':' +
str(gcp.service_port)
1272 server_uri = service_host_name
1277 logger.info(
"Running test_forwarding_rule_default_port")
1282 if gcp.service_port == _DEFAULT_SERVICE_PORT:
1284 _WAIT_FOR_STATS_SEC)
1292 _WAIT_FOR_STATS_SEC)
1297 create_url_map(gcp, url_map_name, backend_service, service_host_name)
1299 potential_ip_addresses = []
1301 for i
in range(max_attempts):
1302 potential_ip_addresses.append(
'10.10.10.%d' %
1303 (random.randint(0, 255)))
1305 potential_ip_addresses)
1307 _WAIT_FOR_STATS_SEC)
1313 _WAIT_FOR_STATS_SEC)
1318 if passed
or not args.halt_after_fail:
1326 potential_service_ports)
1327 if gcp.service_port != _DEFAULT_SERVICE_PORT:
1331 server_uri = service_host_name +
':' +
str(gcp.service_port)
1333 server_uri = service_host_name
1338 alternate_backend_service, same_zone_instance_group):
1343 logger.info(
'Running test_traffic_splitting')
1346 gcp, original_backend_service, instance_group,
1347 alternate_backend_service, same_zone_instance_group)
1353 logger.info(
'patching url map with traffic splitting')
1354 original_service_percentage, alternate_service_percentage = 20, 80
1357 services_with_weights={
1358 original_backend_service: original_service_percentage,
1359 alternate_backend_service: alternate_service_percentage,
1362 expected_instance_percentage = [
1363 original_service_percentage * 1.0 /
len(original_backend_instances)
1364 ] *
len(original_backend_instances) + [
1365 alternate_service_percentage * 1.0 /
1366 len(alternate_backend_instances)
1367 ] *
len(alternate_backend_instances)
1371 'waiting for traffic to go to all backends (including alternate)')
1373 original_backend_instances + alternate_backend_instances,
1374 _WAIT_FOR_STATS_SEC)
1380 for i
in range(retry_count):
1382 got_instance_count = [
1383 stats.rpcs_by_peer[i]
for i
in original_backend_instances
1384 ] + [stats.rpcs_by_peer[i]
for i
in alternate_backend_instances]
1385 total_count =
sum(got_instance_count)
1386 got_instance_percentage = [
1387 x * 100.0 / total_count
for x
in got_instance_count
1392 expected_instance_percentage, 5)
1393 except Exception
as e:
1394 logger.info(
'attempt %d', i)
1395 logger.info(
'got percentage: %s', got_instance_percentage)
1396 logger.info(
'expected percentage: %s',
1397 expected_instance_percentage)
1399 if i == retry_count - 1:
1401 'RPC distribution (%s) differs from expected (%s)' %
1402 (got_instance_percentage, expected_instance_percentage))
1404 logger.info(
"success")
1410 if passed
or not args.halt_after_fail:
1416 alternate_backend_service, same_zone_instance_group):
1424 logger.info(
'Running test_path_matching')
1427 gcp, original_backend_service, instance_group,
1428 alternate_backend_service, same_zone_instance_group)
1439 'fullPathMatch':
'/grpc.testing.TestService/EmptyCall'
1441 'service': alternate_backend_service.url
1444 "EmptyCall": alternate_backend_instances,
1445 "UnaryCall": original_backend_instances
1452 'prefixMatch':
'/grpc.testing.TestService/Unary'
1454 'service': alternate_backend_service.url
1457 "UnaryCall": alternate_backend_instances,
1458 "EmptyCall": original_backend_instances
1471 'prefixMatch':
'/grpc.testing.TestService/Unary'
1473 'service': original_backend_service.url
1480 '/grpc.testing.TestService/EmptyCall'
1482 'service': alternate_backend_service.url
1486 "UnaryCall": original_backend_instances,
1487 "EmptyCall": alternate_backend_instances
1497 'service': alternate_backend_service.url
1500 "UnaryCall": alternate_backend_instances,
1501 "EmptyCall": original_backend_instances
1509 'fullPathMatch':
'/gRpC.tEsTinG.tEstseRvice/empTycaLl',
1512 'service': alternate_backend_service.url
1515 "UnaryCall": original_backend_instances,
1516 "EmptyCall": alternate_backend_instances
1520 for (route_rules, expected_instances)
in test_cases:
1521 logger.info(
'patching url map with %s', route_rules)
1523 original_backend_service,
1524 route_rules=route_rules)
1528 'waiting for traffic to go to all backends (including alternate)'
1531 original_backend_instances + alternate_backend_instances,
1532 _WAIT_FOR_STATS_SEC)
1537 for i
in range(retry_count):
1539 if not stats.rpcs_by_method:
1541 'stats.rpcs_by_method is None, the interop client stats service does not support this test case'
1543 logger.info(
'attempt %d', i)
1545 logger.info(
"success")
1547 elif i == retry_count - 1:
1549 'timeout waiting for RPCs to the expected instances: %s'
1550 % expected_instances)
1555 if passed
or not args.halt_after_fail:
1561 alternate_backend_service, same_zone_instance_group):
1569 logger.info(
'Running test_header_matching')
1572 gcp, original_backend_service, instance_group,
1573 alternate_backend_service, same_zone_instance_group)
1588 'headerName': _TEST_METADATA_KEY,
1589 'exactMatch': _TEST_METADATA_VALUE_EMPTY
1592 'service': alternate_backend_service.url
1595 "EmptyCall": alternate_backend_instances,
1596 "UnaryCall": original_backend_instances
1607 'headerName': _TEST_METADATA_KEY,
1608 'prefixMatch': _TEST_METADATA_VALUE_UNARY[:2]
1611 'service': alternate_backend_service.url
1614 "EmptyCall": original_backend_instances,
1615 "UnaryCall": alternate_backend_instances
1626 'headerName': _TEST_METADATA_KEY,
1627 'suffixMatch': _TEST_METADATA_VALUE_EMPTY[-2:]
1630 'service': alternate_backend_service.url
1633 "EmptyCall": alternate_backend_instances,
1634 "UnaryCall": original_backend_instances
1645 'headerName': _TEST_METADATA_NUMERIC_KEY,
1646 'presentMatch':
True
1649 'service': alternate_backend_service.url
1652 "EmptyCall": original_backend_instances,
1653 "UnaryCall": alternate_backend_instances
1665 'headerName': _TEST_METADATA_KEY,
1666 'exactMatch': _TEST_METADATA_VALUE_UNARY,
1670 'service': alternate_backend_service.url
1673 "EmptyCall": alternate_backend_instances,
1674 "UnaryCall": original_backend_instances
1685 'headerName': _TEST_METADATA_NUMERIC_KEY,
1687 'rangeStart':
'100',
1692 'service': alternate_backend_service.url
1695 "EmptyCall": original_backend_instances,
1696 "UnaryCall": alternate_backend_instances
1710 "^%s.*%s$" % (_TEST_METADATA_VALUE_EMPTY[:2],
1711 _TEST_METADATA_VALUE_EMPTY[-2:])
1714 'service': alternate_backend_service.url
1717 "EmptyCall": alternate_backend_instances,
1718 "UnaryCall": original_backend_instances
1722 for (route_rules, expected_instances)
in test_cases:
1723 logger.info(
'patching url map with %s -> alternative',
1724 route_rules[0][
'matchRules'])
1726 original_backend_service,
1727 route_rules=route_rules)
1731 'waiting for traffic to go to all backends (including alternate)'
1734 original_backend_instances + alternate_backend_instances,
1735 _WAIT_FOR_STATS_SEC)
1740 for i
in range(retry_count):
1742 if not stats.rpcs_by_method:
1744 'stats.rpcs_by_method is None, the interop client stats service does not support this test case'
1746 logger.info(
'attempt %d', i)
1748 logger.info(
"success")
1750 elif i == retry_count - 1:
1752 'timeout waiting for RPCs to the expected instances: %s'
1753 % expected_instances)
1758 if passed
or not args.halt_after_fail:
1764 same_zone_instance_group):
1766 Since backend service circuit_breakers configuration cannot be unset,
1767 which causes trouble for restoring validate_for_proxy flag in target
1768 proxy/global forwarding rule. This test uses dedicated backend sevices.
1769 The url_map and backend services undergoes the following state changes:
1772 original_backend_service -> [instance_group]
1773 extra_backend_service -> []
1774 more_extra_backend_service -> []
1776 url_map -> [original_backend_service]
1779 extra_backend_service (with circuit_breakers) -> [instance_group]
1780 more_extra_backend_service (with circuit_breakers) -> [same_zone_instance_group]
1782 url_map -> [extra_backend_service, more_extra_backend_service]
1785 original_backend_service -> [instance_group]
1786 extra_backend_service (with circuit_breakers) -> []
1787 more_extra_backend_service (with circuit_breakers) -> []
1789 url_map -> [original_backend_service]
1791 logger.info(
'Running test_circuit_breaking')
1792 additional_backend_services = []
1800 extra_backend_service_name = _BASE_BACKEND_SERVICE_NAME +
'-extra' + gcp_suffix
1801 more_extra_backend_service_name = _BASE_BACKEND_SERVICE_NAME +
'-more-extra' + gcp_suffix
1803 extra_backend_service_name)
1804 additional_backend_services.append(extra_backend_service)
1806 gcp, more_extra_backend_service_name)
1807 additional_backend_services.append(more_extra_backend_service)
1812 logger.info(
'disabling validate_for_proxyless in target proxy')
1814 extra_backend_service_max_requests = 500
1815 more_extra_backend_service_max_requests = 1000
1817 extra_backend_service, [instance_group],
1820 extra_backend_service_max_requests
1822 logger.info(
'Waiting for extra backends to become healthy')
1825 more_extra_backend_service,
1826 [same_zone_instance_group],
1829 more_extra_backend_service_max_requests
1831 logger.info(
'Waiting for more extra backend to become healthy')
1833 same_zone_instance_group)
1836 gcp, same_zone_instance_group)
1842 'fullPathMatch':
'/grpc.testing.TestService/UnaryCall'
1844 'service': extra_backend_service.url
1850 'fullPathMatch':
'/grpc.testing.TestService/EmptyCall'
1852 'service': more_extra_backend_service.url
1858 messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1859 messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL
1861 logger.info(
'Patching url map with %s', route_rules)
1863 extra_backend_service,
1864 route_rules=route_rules)
1865 logger.info(
'Waiting for traffic to go to all backends')
1867 extra_backend_instances + more_extra_backend_instances,
1868 _WAIT_FOR_STATS_SEC)
1872 messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1873 messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL
1874 ], [(messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1875 'rpc-behavior',
'keep-open'),
1876 (messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL,
1877 'rpc-behavior',
'keep-open')])
1879 'UNARY_CALL', (_WAIT_FOR_BACKEND_SEC +
1880 int(extra_backend_service_max_requests / args.qps)),
1881 extra_backend_service_max_requests, 1)
1882 logger.info(
'UNARY_CALL reached stable state (%d)',
1883 extra_backend_service_max_requests)
1886 (_WAIT_FOR_BACKEND_SEC +
1887 int(more_extra_backend_service_max_requests / args.qps)),
1888 more_extra_backend_service_max_requests, 1)
1889 logger.info(
'EMPTY_CALL reached stable state (%d)',
1890 more_extra_backend_service_max_requests)
1893 extra_backend_service_max_requests = 800
1895 extra_backend_service, [instance_group],
1898 extra_backend_service_max_requests
1901 'UNARY_CALL', (_WAIT_FOR_BACKEND_SEC +
1902 int(extra_backend_service_max_requests / args.qps)),
1903 extra_backend_service_max_requests, 1)
1904 logger.info(
'UNARY_CALL reached stable state after increase (%d)',
1905 extra_backend_service_max_requests)
1906 logger.info(
'success')
1910 [messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL])
1915 if passed
or not args.halt_after_fail:
1919 for backend_service
in additional_backend_services:
1925 logger.info(
'Running test_timeout')
1927 logger.info(
'waiting for original backends to become healthy')
1934 'fullPathMatch':
'/grpc.testing.TestService/UnaryCall'
1936 'service': original_backend_service.url,
1938 'maxStreamDuration': {
1944 original_backend_service,
1945 route_rules=route_rules)
1949 'timeout_exceeded (UNARY_CALL), timeout_different_route (EMPTY_CALL)',
1954 messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1955 messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL,
1958 (messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1959 'rpc-behavior',
'sleep-4'),
1960 (messages_pb2.ClientConfigureRequest.RpcType.EMPTY_CALL,
1961 'rpc-behavior',
'sleep-4'),
1970 'app_timeout_exceeded',
1974 messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1977 (messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
1978 'rpc-behavior',
'sleep-2'),
1987 'timeout_not_exceeded',
1991 messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
2003 for (testcase_name, client_config, expected_results)
in test_cases:
2004 logger.info(
'starting case %s', testcase_name)
2018 if not before_stats.stats_per_method:
2020 'stats.stats_per_method is None, the interop client stats service does not support this test case'
2022 for i
in range(attempt_count):
2023 logger.info(
'%s: attempt %d', testcase_name, i)
2025 test_runtime_secs = 10
2026 time.sleep(test_runtime_secs)
2030 for rpc, status
in list(expected_results.items()):
2031 qty = (after_stats.stats_per_method[rpc].result[status] -
2032 before_stats.stats_per_method[rpc].result[status])
2033 want = test_runtime_secs * args.qps
2035 if qty < (want * .9)
or qty > (want * 1.1):
2036 logger.info(
'%s: failed due to %s[%s]: got %d want ~%d',
2037 testcase_name, rpc, status, qty, want)
2040 logger.info(
'success')
2042 logger.info(
'%s attempt %d failed', testcase_name, i)
2043 before_stats = after_stats
2046 '%s: timeout waiting for expected results: %s; got %s' %
2047 (testcase_name, expected_results,
2048 after_stats.stats_per_method))
2053 if passed
or not args.halt_after_fail:
2058 logger.info(
'Running test_fault_injection')
2060 logger.info(
'waiting for original backends to become healthy')
2063 testcase_header =
'fi_testcase'
2065 def _route(pri, name, fi_policy):
2072 'headerName': testcase_header,
2076 'service': original_backend_service.url,
2078 'faultInjectionPolicy': fi_policy
2101 zero_route.update(_delay(0))
2103 _route(0,
'zero_percent_fault_injection', zero_route),
2104 _route(1,
'always_delay', _delay(100)),
2105 _route(2,
'always_abort',
_abort(100)),
2106 _route(3,
'delay_half', _delay(50)),
2107 _route(4,
'abort_half',
_abort(50)),
2113 'service': original_backend_service.url,
2118 original_backend_service,
2119 route_rules=route_rules)
2159 'zero_percent_fault_injection',
2166 'non_matching_fault_injection',
2177 for (testcase_name, client_config, expected_results)
in test_cases:
2178 logger.info(
'starting case %s', testcase_name)
2180 client_config[
'metadata'] = [
2181 (messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
2182 testcase_header, testcase_name)
2184 client_config[
'rpc_types'] = [
2185 messages_pb2.ClientConfigureRequest.RpcType.UNARY_CALL,
2203 if not before_stats.stats_per_method:
2205 'stats.stats_per_method is None, the interop client stats service does not support this test case'
2207 for i
in range(attempt_count):
2208 logger.info(
'%s: attempt %d', testcase_name, i)
2210 test_runtime_secs = 10
2211 time.sleep(test_runtime_secs)
2215 for status, pct
in list(expected_results.items()):
2217 qty = (after_stats.stats_per_method[rpc].result[status] -
2218 before_stats.stats_per_method[rpc].result[status])
2219 want = pct * args.qps * test_runtime_secs
2221 VARIANCE_ALLOWED = 0.1
2222 if abs(qty - want) > want * VARIANCE_ALLOWED:
2223 logger.info(
'%s: failed due to %s[%s]: got %d want ~%d',
2224 testcase_name, rpc, status, qty, want)
2227 logger.info(
'success')
2229 logger.info(
'%s attempt %d failed', testcase_name, i)
2230 before_stats = after_stats
2233 '%s: timeout waiting for expected results: %s; got %s' %
2234 (testcase_name, expected_results,
2235 after_stats.stats_per_method))
2240 if passed
or not args.halt_after_fail:
2245 def test_csds(gcp, original_backend_service, instance_group, server_uri):
2246 test_csds_timeout_s = datetime.timedelta(minutes=5).total_seconds()
2247 sleep_interval_between_attempts_s = datetime.timedelta(
2248 seconds=2).total_seconds()
2249 logger.info(
'Running test_csds')
2251 logger.info(
'waiting for original backends to become healthy')
2255 deadline = time.time() + test_csds_timeout_s
2257 while time.time() <= deadline:
2259 logger.info(
'test_csds attempt %d: received xDS config %s', cnt,
2260 json.dumps(client_config, indent=2))
2261 if client_config
is not None:
2265 if client_config[
'node'][
'locality'][
'zone'] != args.zone:
2266 logger.info(
'Invalid zone %s != %s',
2267 client_config[
'node'][
'locality'][
'zone'],
2271 for xds_config
in client_config.get(
'xds_config', []):
2272 if 'listener_config' in xds_config:
2273 listener_name = xds_config[
'listener_config'][
2274 'dynamic_listeners'][0][
'active_state'][
'listener'][
2276 if listener_name != server_uri:
2277 logger.info(
'Invalid Listener name %s != %s',
2278 listener_name, server_uri)
2282 elif 'route_config' in xds_config:
2284 xds_config[
'route_config'][
'dynamic_route_configs']
2285 [0][
'route_config'][
'virtual_hosts'])
2287 logger.info(
'Invalid number of VirtualHosts %s',
2292 elif 'cluster_config' in xds_config:
2293 cluster_type = xds_config[
'cluster_config'][
2294 'dynamic_active_clusters'][0][
'cluster'][
'type']
2295 if cluster_type !=
'EDS':
2296 logger.info(
'Invalid cluster type %s != EDS',
2301 elif 'endpoint_config' in xds_config:
2302 sub_zone = xds_config[
"endpoint_config"][
2303 "dynamic_endpoint_configs"][0][
"endpoint_config"][
2304 "endpoints"][0][
"locality"][
"sub_zone"]
2305 if args.zone
not in sub_zone:
2306 logger.info(
'Invalid endpoint sub_zone %s',
2311 for generic_xds_config
in client_config.get(
2312 'generic_xds_configs', []):
2313 if re.search(
r'\.Listener$',
2314 generic_xds_config[
'type_url']):
2316 listener = generic_xds_config[
"xds_config"]
2317 if listener[
'name'] != server_uri:
2318 logger.info(
'Invalid Listener name %s != %s',
2319 listener_name, server_uri)
2321 elif re.search(
r'\.RouteConfiguration$',
2322 generic_xds_config[
'type_url']):
2324 route_config = generic_xds_config[
"xds_config"]
2325 if not len(route_config[
'virtual_hosts']):
2326 logger.info(
'Invalid number of VirtualHosts %s',
2329 elif re.search(
r'\.Cluster$',
2330 generic_xds_config[
'type_url']):
2332 cluster = generic_xds_config[
"xds_config"]
2333 if cluster[
'type'] !=
'EDS':
2334 logger.info(
'Invalid cluster type %s != EDS',
2337 elif re.search(
r'\.ClusterLoadAssignment$',
2338 generic_xds_config[
'type_url']):
2340 endpoint = generic_xds_config[
"xds_config"]
2341 if args.zone
not in endpoint[
"endpoints"][0][
2342 "locality"][
"sub_zone"]:
2343 logger.info(
'Invalid endpoint sub_zone %s',
2346 want = {
'lds',
'rds',
'cds',
'eds'}
2348 logger.info(
'Incomplete xDS config dump, seen=%s', seen)
2351 logger.exception(
'Error in xDS config dump:')
2356 logger.info(
'success')
2358 logger.info(
'test_csds attempt %d failed', cnt)
2360 time.sleep(sleep_interval_between_attempts_s)
2363 raise RuntimeError(
'failed to receive a valid xDS config in %s seconds' %
2364 test_csds_timeout_s)
2368 if not gcp.alpha_compute:
2370 'Not setting validateForProxy because alpha is not enabled')
2372 if len(gcp.global_forwarding_rules) != 1
or len(
2373 gcp.target_proxies) != 1
or len(gcp.url_maps) != 1:
2375 "Global forwarding rule, target proxy or url map not found.")
2388 health_stub = health_pb2_grpc.HealthStub(channel)
2389 return health_stub.Check(health_pb2.HealthCheckRequest())
2393 logger.info(
'setting %s serving status to %s', instances, serving)
2394 for instance
in instances:
2396 (instance, service_port))
as channel:
2397 logger.info(
'setting %s serving status to %s', instance, serving)
2398 stub = test_pb2_grpc.XdsUpdateHealthServiceStub(channel)
2402 stub.SetServing(empty_pb2.Empty())
2404 stub.SetNotServing(empty_pb2.Empty())
2406 logger.info(
'got instance service status %s', serving_status)
2407 want_status = health_pb2.HealthCheckResponse.SERVING
if serving
else health_pb2.HealthCheckResponse.NOT_SERVING
2408 if serving_status.status == want_status:
2410 if i == retry_count - 1:
2412 'failed to set instance service status after %d retries'
2423 peer
in instance_names
for peer
in list(stats.rpcs_by_peer.keys()))
2427 if path_to_server_binary:
2428 return 'nohup %s --port=%d 1>/dev/null &' % (path_to_server_binary,
2431 return """#!/bin/bash
2433 sudo apt install -y git default-jdk
2436 git clone https://github.com/grpc/grpc-java.git
2438 pushd interop-testing
2439 ../gradlew installDist -x test -PskipCodegen=true -PskipAndroid=true
2441 nohup build/install/grpc-interop-testing/bin/xds-test-server \
2442 --port=%d 1>/dev/null &""" % service_port
2451 'items': [
'allow-health-checks']
2453 'machineType': machine_type,
2454 'serviceAccounts': [{
2456 'scopes': [
'https://www.googleapis.com/auth/cloud-platform',]
2458 'networkInterfaces': [{
2460 'type':
'ONE_TO_ONE_NAT'
2466 'initializeParams': {
2467 'sourceImage': source_image
2473 'key':
'startup-script',
2474 'value': startup_script
2480 logger.debug(
'Sending GCP request with body=%s', config)
2481 result = gcp.compute.instanceTemplates().
insert(
2482 project=gcp.project, body=config).execute(num_retries=_GCP_API_RETRIES)
2484 gcp.instance_template =
GcpResource(config[
'name'], result[
'targetLink'])
2490 'instanceTemplate': gcp.instance_template.url,
2494 'port': gcp.service_port
2498 logger.debug(
'Sending GCP request with body=%s', config)
2499 result = gcp.compute.instanceGroupManagers().
insert(
2500 project=gcp.project, zone=zone,
2501 body=config).execute(num_retries=_GCP_API_RETRIES)
2503 result = gcp.compute.instanceGroupManagers().
get(
2504 project=gcp.project, zone=zone,
2505 instanceGroupManager=config[
'name']).execute(
2506 num_retries=_GCP_API_RETRIES)
2507 instance_group =
InstanceGroup(config[
'name'], result[
'instanceGroup'],
2509 gcp.instance_groups.append(instance_group)
2511 _WAIT_FOR_OPERATION_SEC)
2512 return instance_group
2516 if gcp.alpha_compute:
2520 'grpcHealthCheck': {
2521 'portSpecification':
'USE_SERVING_PORT'
2524 compute_to_use = gcp.alpha_compute
2533 compute_to_use = gcp.compute
2534 logger.debug(
'Sending GCP request with body=%s', config)
2535 result = compute_to_use.healthChecks().
insert(
2536 project=gcp.project, body=config).execute(num_retries=_GCP_API_RETRIES)
2538 gcp.health_check =
GcpResource(config[
'name'], result[
'targetLink'])
2544 'direction':
'INGRESS',
2548 'sourceRanges': [
'35.191.0.0/16',
'130.211.0.0/22'],
2549 'targetTags': [
'allow-health-checks'],
2551 logger.debug(
'Sending GCP request with body=%s', config)
2552 result = gcp.compute.firewalls().
insert(
2553 project=gcp.project, body=config).execute(num_retries=_GCP_API_RETRIES)
2555 gcp.health_check_firewall_rule =
GcpResource(config[
'name'],
2556 result[
'targetLink'])
2560 if gcp.alpha_compute:
2562 compute_to_use = gcp.alpha_compute
2565 compute_to_use = gcp.compute
2568 'loadBalancingScheme':
'INTERNAL_SELF_MANAGED',
2569 'healthChecks': [gcp.health_check.url],
2571 'protocol': protocol
2573 logger.debug(
'Sending GCP request with body=%s', config)
2574 result = compute_to_use.backendServices().
insert(
2575 project=gcp.project, body=config).execute(num_retries=_GCP_API_RETRIES)
2577 backend_service =
GcpResource(config[
'name'], result[
'targetLink'])
2578 gcp.backend_services.append(backend_service)
2579 return backend_service
2585 'defaultService': backend_service.url,
2587 'name': _PATH_MATCHER_NAME,
2588 'defaultService': backend_service.url,
2591 'hosts': [host_name],
2592 'pathMatcher': _PATH_MATCHER_NAME
2595 logger.debug(
'Sending GCP request with body=%s', config)
2596 result = gcp.compute.urlMaps().
insert(
2597 project=gcp.project, body=config).execute(num_retries=_GCP_API_RETRIES)
2599 url_map =
GcpResource(config[
'name'], result[
'targetLink'])
2600 gcp.url_maps.append(url_map)
2607 'hosts': [
'%s:%d' % (host_name, gcp.service_port)],
2608 'pathMatcher': _PATH_MATCHER_NAME
2611 logger.debug(
'Sending GCP request with body=%s', config)
2612 result = gcp.compute.urlMaps().patch(
2613 project=gcp.project, urlMap=name,
2614 body=config).execute(num_retries=_GCP_API_RETRIES)
2620 arg_url_map_url = url_map.url
2622 arg_url_map_url = gcp.url_maps[0].url
2623 if gcp.alpha_compute:
2626 'url_map': arg_url_map_url,
2627 'validate_for_proxyless': validate_for_proxyless
2629 logger.debug(
'Sending GCP request with body=%s', config)
2630 result = gcp.alpha_compute.targetGrpcProxies().
insert(
2631 project=gcp.project,
2632 body=config).execute(num_retries=_GCP_API_RETRIES)
2636 'url_map': arg_url_map_url,
2638 logger.debug(
'Sending GCP request with body=%s', config)
2639 result = gcp.compute.targetHttpProxies().
insert(
2640 project=gcp.project,
2641 body=config).execute(num_retries=_GCP_API_RETRIES)
2643 target_proxy =
GcpResource(config[
'name'], result[
'targetLink'])
2644 gcp.target_proxies.append(target_proxy)
2651 potential_ip_addresses=['0.0.0.0'],
2654 arg_target_proxy_url = target_proxy.url
2656 arg_target_proxy_url = gcp.target_proxies[0].url
2657 if gcp.alpha_compute:
2658 compute_to_use = gcp.alpha_compute
2660 compute_to_use = gcp.compute
2661 for port
in potential_ports:
2662 for ip_address
in potential_ip_addresses:
2666 'loadBalancingScheme':
'INTERNAL_SELF_MANAGED',
2667 'portRange':
str(port),
2668 'IPAddress': ip_address,
2669 'network': args.network,
2670 'target': arg_target_proxy_url,
2672 logger.debug(
'Sending GCP request with body=%s', config)
2673 result = compute_to_use.globalForwardingRules().
insert(
2674 project=gcp.project,
2675 body=config).execute(num_retries=_GCP_API_RETRIES)
2677 global_forwarding_rule =
GcpResource(config[
'name'],
2678 result[
'targetLink'])
2679 gcp.global_forwarding_rules.append(global_forwarding_rule)
2680 gcp.service_port = port
2682 except googleapiclient.errors.HttpError
as http_error:
2684 'Got error %s when attempting to create forwarding rule to '
2685 '%s:%d. Retrying with another port.' %
2686 (http_error, ip_address, port))
2691 result = gcp.compute.healthChecks().
get(
2692 project=gcp.project, healthCheck=health_check_name).execute()
2693 gcp.health_check =
GcpResource(health_check_name, result[
'selfLink'])
2694 except Exception
as e:
2695 gcp.errors.append(e)
2696 gcp.health_check =
GcpResource(health_check_name,
None)
2701 result = gcp.compute.firewalls().
get(project=gcp.project,
2702 firewall=firewall_name).execute()
2703 gcp.health_check_firewall_rule =
GcpResource(firewall_name,
2705 except Exception
as e:
2706 gcp.errors.append(e)
2707 gcp.health_check_firewall_rule =
GcpResource(firewall_name,
None)
2712 result = gcp.compute.backendServices().
get(
2713 project=gcp.project, backendService=backend_service_name).execute()
2714 backend_service =
GcpResource(backend_service_name, result[
'selfLink'])
2715 except Exception
as e:
2717 gcp.errors.append(e)
2718 backend_service =
GcpResource(backend_service_name,
None)
2719 gcp.backend_services.append(backend_service)
2720 return backend_service
2725 result = gcp.compute.urlMaps().
get(project=gcp.project,
2726 urlMap=url_map_name).execute()
2727 url_map =
GcpResource(url_map_name, result[
'selfLink'])
2728 gcp.url_maps.append(url_map)
2729 except Exception
as e:
2731 gcp.errors.append(e)
2736 if gcp.alpha_compute:
2737 result = gcp.alpha_compute.targetGrpcProxies().
get(
2738 project=gcp.project,
2739 targetGrpcProxy=target_proxy_name).execute()
2741 result = gcp.compute.targetHttpProxies().
get(
2742 project=gcp.project,
2743 targetHttpProxy=target_proxy_name).execute()
2744 target_proxy =
GcpResource(target_proxy_name, result[
'selfLink'])
2745 gcp.target_proxies.append(target_proxy)
2746 except Exception
as e:
2748 gcp.errors.append(e)
2753 result = gcp.compute.globalForwardingRules().
get(
2754 project=gcp.project, forwardingRule=forwarding_rule_name).execute()
2755 global_forwarding_rule =
GcpResource(forwarding_rule_name,
2757 gcp.global_forwarding_rules.append(global_forwarding_rule)
2758 except Exception
as e:
2760 gcp.errors.append(e)
2765 result = gcp.compute.instanceTemplates().
get(
2766 project=gcp.project, instanceTemplate=template_name).execute()
2767 gcp.instance_template =
GcpResource(template_name, result[
'selfLink'])
2768 except Exception
as e:
2769 gcp.errors.append(e)
2770 gcp.instance_template =
GcpResource(template_name,
None)
2775 result = gcp.compute.instanceGroups().
get(
2776 project=gcp.project, zone=zone,
2777 instanceGroup=instance_group_name).execute()
2778 gcp.service_port = result[
'namedPorts'][0][
'port']
2779 instance_group =
InstanceGroup(instance_group_name, result[
'selfLink'],
2781 except Exception
as e:
2782 gcp.errors.append(e)
2783 instance_group =
InstanceGroup(instance_group_name,
None, zone)
2784 gcp.instance_groups.append(instance_group)
2785 return instance_group
2789 if not forwarding_rule_to_delete:
2792 logger.debug(
'Deleting forwarding rule %s',
2793 forwarding_rule_to_delete.name)
2794 result = gcp.compute.globalForwardingRules().
delete(
2795 project=gcp.project,
2796 forwardingRule=forwarding_rule_to_delete.name).execute(
2797 num_retries=_GCP_API_RETRIES)
2799 if forwarding_rule_to_delete
in gcp.global_forwarding_rules:
2800 gcp.global_forwarding_rules.remove(forwarding_rule_to_delete)
2803 'Forwarding rule %s does not exist in gcp.global_forwarding_rules',
2804 forwarding_rule_to_delete.name)
2805 except googleapiclient.errors.HttpError
as http_error:
2806 logger.info(
'Delete failed: %s', http_error)
2810 forwarding_rules_to_delete = gcp.global_forwarding_rules.copy()
2811 for forwarding_rule
in forwarding_rules_to_delete:
2816 if not proxy_to_delete:
2819 if gcp.alpha_compute:
2820 logger.debug(
'Deleting grpc proxy %s', proxy_to_delete.name)
2821 result = gcp.alpha_compute.targetGrpcProxies().
delete(
2822 project=gcp.project,
2823 targetGrpcProxy=proxy_to_delete.name).execute(
2824 num_retries=_GCP_API_RETRIES)
2826 logger.debug(
'Deleting http proxy %s', proxy_to_delete.name)
2827 result = gcp.compute.targetHttpProxies().
delete(
2828 project=gcp.project,
2829 targetHttpProxy=proxy_to_delete.name).execute(
2830 num_retries=_GCP_API_RETRIES)
2832 if proxy_to_delete
in gcp.target_proxies:
2833 gcp.target_proxies.remove(proxy_to_delete)
2835 logger.debug(
'Gcp proxy %s does not exist in gcp.target_proxies',
2836 proxy_to_delete.name)
2837 except googleapiclient.errors.HttpError
as http_error:
2838 logger.info(
'Delete failed: %s', http_error)
2842 target_proxies_to_delete = gcp.target_proxies.copy()
2843 for target_proxy
in target_proxies_to_delete:
2848 if not url_map_to_delete:
2851 logger.debug(
'Deleting url map %s', url_map_to_delete.name)
2852 result = gcp.compute.urlMaps().
delete(
2853 project=gcp.project,
2854 urlMap=url_map_to_delete.name).execute(num_retries=_GCP_API_RETRIES)
2856 if url_map_to_delete
in gcp.url_maps:
2857 gcp.url_maps.remove(url_map_to_delete)
2859 logger.debug(
'Url map %s does not exist in gcp.url_maps',
2860 url_map_to_delete.name)
2861 except googleapiclient.errors.HttpError
as http_error:
2862 logger.info(
'Delete failed: %s', http_error)
2866 url_maps_to_delete = gcp.url_maps.copy()
2867 for url_map
in url_maps_to_delete:
2873 logger.debug(
'Deleting backend service %s', backend_service.name)
2874 result = gcp.compute.backendServices().
delete(
2875 project=gcp.project, backendService=backend_service.name).execute(
2876 num_retries=_GCP_API_RETRIES)
2878 except googleapiclient.errors.HttpError
as http_error:
2879 logger.info(
'Delete failed: %s', http_error)
2883 for backend_service
in gcp.backend_services:
2889 logger.debug(
'Deleting firewall %s',
2890 gcp.health_check_firewall_rule.name)
2891 result = gcp.compute.firewalls().
delete(
2892 project=gcp.project,
2893 firewall=gcp.health_check_firewall_rule.name).execute(
2894 num_retries=_GCP_API_RETRIES)
2896 except googleapiclient.errors.HttpError
as http_error:
2897 logger.info(
'Delete failed: %s', http_error)
2902 logger.debug(
'Deleting health check %s', gcp.health_check.name)
2903 result = gcp.compute.healthChecks().
delete(
2904 project=gcp.project, healthCheck=gcp.health_check.name).execute(
2905 num_retries=_GCP_API_RETRIES)
2907 except googleapiclient.errors.HttpError
as http_error:
2908 logger.info(
'Delete failed: %s', http_error)
2912 for instance_group
in gcp.instance_groups:
2914 logger.debug(
'Deleting instance group %s %s', instance_group.name,
2915 instance_group.zone)
2916 result = gcp.compute.instanceGroupManagers().
delete(
2917 project=gcp.project,
2918 zone=instance_group.zone,
2919 instanceGroupManager=instance_group.name).execute(
2920 num_retries=_GCP_API_RETRIES)
2922 instance_group.zone,
2924 timeout_sec=_WAIT_FOR_BACKEND_SEC)
2925 except googleapiclient.errors.HttpError
as http_error:
2926 logger.info(
'Delete failed: %s', http_error)
2931 logger.debug(
'Deleting instance template %s',
2932 gcp.instance_template.name)
2933 result = gcp.compute.instanceTemplates().
delete(
2934 project=gcp.project,
2935 instanceTemplate=gcp.instance_template.name).execute(
2936 num_retries=_GCP_API_RETRIES)
2938 except googleapiclient.errors.HttpError
as http_error:
2939 logger.info(
'Delete failed: %s', http_error)
2945 balancing_mode='UTILIZATION',
2947 circuit_breakers=None):
2948 if gcp.alpha_compute:
2949 compute_to_use = gcp.alpha_compute
2951 compute_to_use = gcp.compute
2954 'group': instance_group.url,
2955 'balancingMode': balancing_mode,
2956 'maxRate': max_rate
if balancing_mode ==
'RATE' else None
2957 }
for instance_group
in instance_groups],
2958 'circuitBreakers': circuit_breakers,
2960 logger.debug(
'Sending GCP request with body=%s', config)
2961 result = compute_to_use.backendServices().patch(
2962 project=gcp.project, backendService=backend_service.name,
2963 body=config).execute(num_retries=_GCP_API_RETRIES)
2966 timeout_sec=_WAIT_FOR_BACKEND_SEC)
2972 timeout_sec=_WAIT_FOR_OPERATION_SEC):
2973 result = gcp.compute.instanceGroupManagers().resize(
2974 project=gcp.project,
2975 zone=instance_group.zone,
2976 instanceGroupManager=instance_group.name,
2977 size=new_size).execute(num_retries=_GCP_API_RETRIES)
2979 instance_group.zone,
2983 new_size, timeout_sec)
2987 backend_service=None,
2988 services_with_weights=None,
2992 url_map_name = url_map.name
2994 url_map_name = gcp.url_maps[0].name
2995 '''change url_map's backend service
2997 Only one of backend_service and service_with_weights can be not None.
2999 if gcp.alpha_compute:
3000 compute_to_use = gcp.alpha_compute
3002 compute_to_use = gcp.compute
3004 if backend_service
and services_with_weights:
3006 'both backend_service and service_with_weights are not None.')
3008 default_service = backend_service.url
if backend_service
else None
3009 default_route_action = {
3010 'weightedBackendServices': [{
3011 'backendService': service.url,
3013 }
for service, w
in list(services_with_weights.items())]
3014 }
if services_with_weights
else None
3018 'name': _PATH_MATCHER_NAME,
3019 'defaultService': default_service,
3020 'defaultRouteAction': default_route_action,
3021 'routeRules': route_rules,
3024 logger.debug(
'Sending GCP request with body=%s', config)
3025 result = compute_to_use.urlMaps().patch(
3026 project=gcp.project, urlMap=url_map_name,
3027 body=config).execute(num_retries=_GCP_API_RETRIES)
3032 expected_size, timeout_sec):
3033 start_time = time.time()
3036 if current_size == expected_size:
3038 if time.time() - start_time > timeout_sec:
3040 'Instance group had expected size %d but actual size %d' %
3041 (expected_size, current_size))
3047 timeout_sec=_WAIT_FOR_OPERATION_SEC):
3048 start_time = time.time()
3049 while time.time() - start_time <= timeout_sec:
3050 result = gcp.compute.globalOperations().
get(
3051 project=gcp.project,
3052 operation=operation).execute(num_retries=_GCP_API_RETRIES)
3053 if result[
'status'] ==
'DONE':
3054 if 'error' in result:
3055 raise Exception(result[
'error'])
3058 raise Exception(
'Operation %s did not complete within %d' %
3059 (operation, timeout_sec))
3065 timeout_sec=_WAIT_FOR_OPERATION_SEC):
3066 start_time = time.time()
3067 while time.time() - start_time <= timeout_sec:
3068 result = gcp.compute.zoneOperations().
get(
3069 project=gcp.project, zone=zone,
3070 operation=operation).execute(num_retries=_GCP_API_RETRIES)
3071 if result[
'status'] ==
'DONE':
3072 if 'error' in result:
3073 raise Exception(result[
'error'])
3076 raise Exception(
'Operation %s did not complete within %d' %
3077 (operation, timeout_sec))
3083 timeout_sec=_WAIT_FOR_BACKEND_SEC):
3084 start_time = time.time()
3085 config = {
'group': instance_group.url}
3087 expected_size =
len(instance_names)
3088 while time.time() - start_time <= timeout_sec:
3089 for instance_name
in instance_names:
3092 logger.info(
'serving status response from %s: %s',
3093 instance_name, status)
3095 logger.info(
'checking serving status of %s failed: %s',
3096 instance_name, rpc_error)
3097 result = gcp.compute.backendServices().getHealth(
3098 project=gcp.project,
3099 backendService=backend_service.name,
3100 body=config).execute(num_retries=_GCP_API_RETRIES)
3101 if 'healthStatus' in result:
3102 logger.info(
'received GCP healthStatus: %s', result[
'healthStatus'])
3104 for instance
in result[
'healthStatus']:
3105 if instance[
'healthState'] !=
'HEALTHY':
3108 if healthy
and expected_size ==
len(result[
'healthStatus']):
3111 logger.info(
'no healthStatus received from GCP')
3113 raise Exception(
'Not all backends became healthy within %d seconds: %s' %
3114 (timeout_sec, result))
3119 result = gcp.compute.instanceGroups().listInstances(
3120 project=gcp.project,
3121 zone=instance_group.zone,
3122 instanceGroup=instance_group.name,
3124 'instanceState':
'ALL'
3125 }).execute(num_retries=_GCP_API_RETRIES)
3126 if 'items' not in result:
3128 for item
in result[
'items']:
3133 instance_name = item[
'instance'].
split(
'/')[-1]
3134 instance_names.append(instance_name)
3135 logger.info(
'retrieved instance names: %s', instance_names)
3136 return instance_names
3144 if gcp.health_check_firewall_rule:
3146 if gcp.health_check:
3149 if gcp.instance_template:
3170 def __init__(self, compute, alpha_compute, project, project_num):
3188 "script start time: %s",
3189 datetime.datetime.now(
3190 datetime.timezone.utc).astimezone().strftime(
"%Y-%m-%dT%H:%M:%S %Z"))
3191 logging.debug(
"logging local timezone: %s",
3192 datetime.datetime.now(datetime.timezone.utc).astimezone().tzinfo)
3193 alpha_compute =
None
3194 if args.compute_discovery_document:
3195 with open(args.compute_discovery_document,
'r')
as discovery_doc:
3196 compute = googleapiclient.discovery.build_from_document(
3197 discovery_doc.read())
3198 if not args.only_stable_gcp_apis
and args.alpha_compute_discovery_document:
3199 with open(args.alpha_compute_discovery_document,
'r')
as discovery_doc:
3200 alpha_compute = googleapiclient.discovery.build_from_document(
3201 discovery_doc.read())
3203 compute = googleapiclient.discovery.build(
'compute',
'v1')
3204 if not args.only_stable_gcp_apis:
3205 alpha_compute = googleapiclient.discovery.build(
'compute',
'alpha')
3210 gcp =
GcpState(compute, alpha_compute, args.project_id, args.project_num)
3211 gcp_suffix = args.gcp_suffix
3212 health_check_name = _BASE_HEALTH_CHECK_NAME + gcp_suffix
3213 if not args.use_existing_gcp_resources:
3214 if args.keep_gcp_resources:
3221 for i
in range(num_attempts):
3223 logger.info(
'Using GCP suffix %s', gcp_suffix)
3226 except googleapiclient.errors.HttpError
as http_error:
3227 gcp_suffix =
'%s-%04d' % (gcp_suffix, random.randint(0, 9999))
3228 health_check_name = _BASE_HEALTH_CHECK_NAME + gcp_suffix
3229 logger.exception(
'HttpError when creating health check')
3230 if gcp.health_check
is None:
3231 raise Exception(
'Failed to create health check name after %d '
3232 'attempts' % num_attempts)
3233 firewall_name = _BASE_FIREWALL_RULE_NAME + gcp_suffix
3234 backend_service_name = _BASE_BACKEND_SERVICE_NAME + gcp_suffix
3235 alternate_backend_service_name = _BASE_BACKEND_SERVICE_NAME +
'-alternate' + gcp_suffix
3236 extra_backend_service_name = _BASE_BACKEND_SERVICE_NAME +
'-extra' + gcp_suffix
3237 more_extra_backend_service_name = _BASE_BACKEND_SERVICE_NAME +
'-more-extra' + gcp_suffix
3238 url_map_name = _BASE_URL_MAP_NAME + gcp_suffix
3239 url_map_name_2 = url_map_name +
'2'
3240 service_host_name = _BASE_SERVICE_HOST + gcp_suffix
3241 target_proxy_name = _BASE_TARGET_PROXY_NAME + gcp_suffix
3242 target_proxy_name_2 = target_proxy_name +
'2'
3243 forwarding_rule_name = _BASE_FORWARDING_RULE_NAME + gcp_suffix
3244 forwarding_rule_name_2 = forwarding_rule_name +
'2'
3245 template_name = _BASE_TEMPLATE_NAME + gcp_suffix
3246 instance_group_name = _BASE_INSTANCE_GROUP_NAME + gcp_suffix
3247 same_zone_instance_group_name = _BASE_INSTANCE_GROUP_NAME +
'-same-zone' + gcp_suffix
3248 secondary_zone_instance_group_name = _BASE_INSTANCE_GROUP_NAME +
'-secondary-zone' + gcp_suffix
3249 potential_service_ports = list(args.service_port_range)
3250 random.shuffle(potential_service_ports)
3251 if args.use_existing_gcp_resources:
3252 logger.info(
'Reusing existing GCP resources')
3257 gcp, alternate_backend_service_name)
3259 extra_backend_service_name,
3262 gcp, more_extra_backend_service_name, record_error=
False)
3269 forwarding_rule_name_2,
3274 gcp, args.zone, same_zone_instance_group_name)
3276 gcp, args.secondary_zone, secondary_zone_instance_group_name)
3278 raise Exception(gcp.errors)
3283 gcp, alternate_backend_service_name)
3284 create_url_map(gcp, url_map_name, backend_service, service_host_name)
3287 potential_service_ports)
3288 if not gcp.service_port:
3290 'Failed to find a valid ip:port for the forwarding rule')
3291 if gcp.service_port != _DEFAULT_SERVICE_PORT:
3298 args.source_image, args.machine_type,
3301 _INSTANCE_GROUP_SIZE)
3304 gcp, args.zone, same_zone_instance_group_name, _INSTANCE_GROUP_SIZE)
3306 gcp, args.secondary_zone, secondary_zone_instance_group_name,
3307 _INSTANCE_GROUP_SIZE)
3312 client_env = dict(os.environ)
3313 if original_grpc_trace:
3314 client_env[
'GRPC_TRACE'] = original_grpc_trace
3315 if original_grpc_verbosity:
3316 client_env[
'GRPC_VERBOSITY'] = original_grpc_verbosity
3317 bootstrap_server_features = []
3319 if gcp.service_port == _DEFAULT_SERVICE_PORT:
3320 server_uri = service_host_name
3322 server_uri = service_host_name +
':' +
str(gcp.service_port)
3323 if args.xds_v3_support:
3324 client_env[
'GRPC_XDS_EXPERIMENTAL_V3_SUPPORT'] =
'true'
3325 bootstrap_server_features.append(
'xds_v3')
3326 if args.bootstrap_file:
3327 bootstrap_path = os.path.abspath(args.bootstrap_file)
3329 with tempfile.NamedTemporaryFile(delete=
False)
as bootstrap_file:
3330 bootstrap_file.write(
3331 _BOOTSTRAP_TEMPLATE.format(
3332 node_id=
'projects/%s/networks/%s/nodes/%s' %
3333 (gcp.project_num, args.network.split(
'/')[-1],
3335 server_features=json.dumps(
3336 bootstrap_server_features)).
encode(
'utf-8'))
3337 bootstrap_path = bootstrap_file.name
3338 client_env[
'GRPC_XDS_BOOTSTRAP'] = bootstrap_path
3339 client_env[
'GRPC_XDS_EXPERIMENTAL_CIRCUIT_BREAKING'] =
'true'
3340 client_env[
'GRPC_XDS_EXPERIMENTAL_ENABLE_TIMEOUT'] =
'true'
3341 client_env[
'GRPC_XDS_EXPERIMENTAL_FAULT_INJECTION'] =
'true'
3342 for test_case
in args.test_case:
3343 if test_case
in _V3_TEST_CASES
and not args.xds_v3_support:
3344 logger.info(
'skipping test %s due to missing v3 support',
3347 if test_case
in _ALPHA_TEST_CASES
and not gcp.alpha_compute:
3348 logger.info(
'skipping test %s due to missing alpha support',
3352 'api_listener',
'forwarding_rule_port_match',
3353 'forwarding_rule_default_port'
3356 'skipping test %s because test configuration is'
3357 'not compatible with client processes on existing'
3358 'client hosts', test_case)
3360 if test_case ==
'forwarding_rule_default_port':
3361 server_uri = service_host_name
3362 result = jobset.JobResult()
3363 log_dir = os.path.join(_TEST_LOG_BASE_DIR, test_case)
3364 if not os.path.exists(log_dir):
3365 os.makedirs(log_dir)
3366 test_log_filename = os.path.join(log_dir, _SPONGE_LOG_NAME)
3367 test_log_file =
open(test_log_filename,
'w+')
3368 client_process =
None
3370 if test_case
in _TESTS_TO_RUN_MULTIPLE_RPCS:
3371 rpcs_to_send =
'--rpc="UnaryCall,EmptyCall"'
3373 rpcs_to_send =
'--rpc="UnaryCall"'
3375 if test_case
in _TESTS_TO_SEND_METADATA:
3376 metadata_to_send =
'--metadata="EmptyCall:{keyE}:{valueE},UnaryCall:{keyU}:{valueU},UnaryCall:{keyNU}:{valueNU}"'.
format(
3377 keyE=_TEST_METADATA_KEY,
3378 valueE=_TEST_METADATA_VALUE_EMPTY,
3379 keyU=_TEST_METADATA_KEY,
3380 valueU=_TEST_METADATA_VALUE_UNARY,
3381 keyNU=_TEST_METADATA_NUMERIC_KEY,
3382 valueNU=_TEST_METADATA_NUMERIC_VALUE)
3389 metadata_to_send =
''
3404 fail_on_failed_rpc =
''
3407 if not CLIENT_HOSTS:
3408 client_cmd_formatted = args.client_cmd.format(
3409 server_uri=server_uri,
3410 stats_port=args.stats_port,
3412 fail_on_failed_rpc=fail_on_failed_rpc,
3413 rpcs_to_send=rpcs_to_send,
3414 metadata_to_send=metadata_to_send)
3415 logger.debug(
'running client: %s', client_cmd_formatted)
3416 client_cmd = shlex.split(client_cmd_formatted)
3417 client_process = subprocess.Popen(client_cmd,
3419 stderr=subprocess.STDOUT,
3420 stdout=test_log_file)
3421 if test_case ==
'backends_restart':
3423 elif test_case ==
'change_backend_service':
3426 alternate_backend_service,
3427 same_zone_instance_group)
3428 elif test_case ==
'gentle_failover':
3430 secondary_zone_instance_group)
3431 elif test_case ==
'load_report_based_failover':
3433 gcp, backend_service, instance_group,
3434 secondary_zone_instance_group)
3435 elif test_case ==
'ping_pong':
3437 elif test_case ==
'remove_instance_group':
3440 same_zone_instance_group)
3441 elif test_case ==
'round_robin':
3443 elif test_case ==
'secondary_locality_gets_no_requests_on_partial_primary_failure':
3445 gcp, backend_service, instance_group,
3446 secondary_zone_instance_group)
3447 elif test_case ==
'secondary_locality_gets_requests_on_primary_failure':
3449 gcp, backend_service, instance_group,
3450 secondary_zone_instance_group)
3451 elif test_case ==
'traffic_splitting':
3453 alternate_backend_service,
3454 same_zone_instance_group)
3455 elif test_case ==
'path_matching':
3457 alternate_backend_service,
3458 same_zone_instance_group)
3459 elif test_case ==
'header_matching':
3461 alternate_backend_service,
3462 same_zone_instance_group)
3463 elif test_case ==
'circuit_breaking':
3465 same_zone_instance_group)
3466 elif test_case ==
'timeout':
3468 elif test_case ==
'fault_injection':
3470 elif test_case ==
'api_listener':
3473 alternate_backend_service)
3474 elif test_case ==
'forwarding_rule_port_match':
3476 gcp, backend_service, instance_group)
3477 elif test_case ==
'forwarding_rule_default_port':
3479 gcp, backend_service, instance_group)
3480 elif test_case ==
'metadata_filter':
3482 alternate_backend_service,
3483 same_zone_instance_group)
3484 elif test_case ==
'csds':
3485 test_csds(gcp, backend_service, instance_group, server_uri)
3487 logger.error(
'Unknown test case: %s', test_case)
3489 if client_process
and client_process.poll()
is not None:
3491 'Client process exited prematurely with exit code %d' %
3492 client_process.returncode)
3493 result.state =
'PASSED'
3494 result.returncode = 0
3495 except Exception
as e:
3496 logger.exception(
'Test case %s failed', test_case)
3497 failed_tests.append(test_case)
3498 result.state =
'FAILED'
3500 if args.halt_after_fail:
3505 if client_process.returncode:
3506 logger.info(
'Client exited with code %d' %
3507 client_process.returncode)
3509 client_process.terminate()
3510 test_log_file.close()
3513 result.message = result.message.encode(
'UTF-8')
3514 test_results[test_case] = [result]
3515 if args.log_client_output:
3516 logger.info(
'Client output:')
3517 with open(test_log_filename,
'r')
as client_output:
3518 logger.info(client_output.read())
3519 if not os.path.exists(_TEST_LOG_BASE_DIR):
3520 os.makedirs(_TEST_LOG_BASE_DIR)
3521 report_utils.render_junit_xml_report(test_results,
3525 suite_name=
'xds_tests',
3528 logger.error(
'Test case(s) %s failed', failed_tests)
3531 keep_resources = args.keep_gcp_resources
3532 if args.halt_after_fail
and failed_tests:
3534 'Halt after fail triggered, exiting without cleaning up resources')
3535 keep_resources =
True
3536 if not keep_resources:
3537 logger.info(
'Cleaning up GCP resources. This may take some time.')