15 """Run performance tests locally or remotely."""
17 from __future__
import print_function
23 import multiprocessing
40 _ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]),
'../..'))
43 _REMOTE_HOST_USERNAME =
'jenkins'
45 _SCENARIO_TIMEOUT = 3 * 60
46 _WORKER_TIMEOUT = 3 * 60
48 _QUIT_WORKER_TIMEOUT = 2 * 60
52 """Encapsulates a qps worker server job."""
54 def __init__(self, spec, language, host_and_port, perf_file_base_name=None):
63 newline_on_success=
True,
68 """Polls a job and returns True if given job is still running."""
82 cmdline = (language.worker_cmdline() + [
'--driver_port=%s' % port])
85 host_and_port =
'%s:%s' % (remote_host, port)
87 host_and_port =
'localhost:%s' % port
89 perf_file_base_name =
None
91 perf_file_base_name =
'%s-%s' % (host_and_port, shortname)
93 cmdline = perf_cmd + [
'-o',
'%s-perf.data' % perf_file_base_name
96 worker_timeout = _WORKER_TIMEOUT
98 user_at_host =
'%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
100 cmdline = [
'timeout',
'%s' % (worker_timeout + 30)] + cmdline
103 'cd ~/performance_workspace/grpc/ && %s' %
' '.join(cmdline)
107 jobspec = jobset.JobSpec(
112 verbose_success=
True)
113 return QpsWorkerJob(jobspec, language, host_and_port, perf_file_base_name)
119 bq_result_table=None,
121 """Runs one scenario using QPS driver."""
123 cmd =
'QPS_WORKERS="%s" ' %
','.join(workers)
125 cmd +=
'BQ_RESULT_TABLE="%s" ' % bq_result_table
126 cmd +=
'tools/run_tests/performance/run_qps_driver.sh '
127 cmd +=
'--scenarios_json=%s ' % pipes.quote(
128 json.dumps({
'scenarios': [scenario_json]}))
129 cmd +=
'--scenario_result_file=scenario_result.json '
130 if server_cpu_load != 0:
131 cmd +=
'--search_param=offered_load --initial_search_value=1000 --targeted_cpu_load=%d --stride=500 --error_tolerance=0.01' % server_cpu_load
133 user_at_host =
'%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
134 cmd =
'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (
135 user_at_host, pipes.quote(cmd))
137 return jobset.JobSpec(cmdline=[cmd],
138 shortname=
'%s' % scenario_json[
'name'],
139 timeout_seconds=_SCENARIO_TIMEOUT,
141 verbose_success=
True)
145 """Runs quit using QPS driver."""
147 cmd =
'QPS_WORKERS="%s" cmake/build/qps_json_driver --quit' %
','.join(
148 w.host_and_port
for w
in workers)
150 user_at_host =
'%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)
151 cmd =
'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (
152 user_at_host, pipes.quote(cmd))
154 return jobset.JobSpec(cmdline=[cmd],
155 shortname=
'shutdown_workers',
156 timeout_seconds=_QUIT_WORKER_TIMEOUT,
158 verbose_success=
True)
163 bq_result_table=None):
164 """Runs netperf benchmark."""
165 cmd =
'NETPERF_SERVER_HOST="%s" ' % server_host
167 cmd +=
'BQ_RESULT_TABLE="%s" ' % bq_result_table
172 jenkins_job_name = os.getenv(
'KOKORO_JOB_NAME')
174 cmd +=
'KOKORO_JOB_NAME="%s" ' % jenkins_job_name
175 jenkins_build_number = os.getenv(
'KOKORO_BUILD_NUMBER')
176 if jenkins_build_number:
177 cmd +=
'KOKORO_BUILD_NUMBER="%s" ' % jenkins_build_number
179 cmd +=
'tools/run_tests/performance/run_netperf.sh'
181 user_at_host =
'%s@%s' % (_REMOTE_HOST_USERNAME, client_host)
182 cmd =
'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (
183 user_at_host, pipes.quote(cmd))
185 return jobset.JobSpec(cmdline=[cmd],
187 timeout_seconds=_NETPERF_TIMEOUT,
189 verbose_success=
True)
193 """Archives local version of repo including submodules."""
194 cmdline = [
'tar',
'-cf',
'../grpc.tar',
'../grpc/']
195 if 'java' in languages:
196 cmdline.append(
'../grpc-java')
197 if 'go' in languages:
198 cmdline.append(
'../grpc-go')
199 if 'node' in languages
or 'node_purejs' in languages:
200 cmdline.append(
'../grpc-node')
202 archive_job = jobset.JobSpec(cmdline=cmdline,
203 shortname=
'archive_repo',
204 timeout_seconds=3 * 60)
206 jobset.message(
'START',
'Archiving local repository.', do_newline=
True)
207 num_failures, _ = jobset.run([archive_job],
208 newline_on_success=
True,
210 if num_failures == 0:
211 jobset.message(
'SUCCESS',
212 'Archive with local repository created successfully.',
215 jobset.message(
'FAILED',
216 'Failed to archive local repository.',
222 """Prepares remote hosts (and maybe prepare localhost as well)."""
223 prepare_timeout = 10 * 60
226 user_at_host =
'%s@%s' % (_REMOTE_HOST_USERNAME, host)
229 cmdline=[
'tools/run_tests/performance/remote_host_prepare.sh'],
230 shortname=
'remote_host_prepare.%s' % host,
231 environ={
'USER_AT_HOST': user_at_host},
232 timeout_seconds=prepare_timeout))
237 cmdline=[
'tools/run_tests/performance/kill_workers.sh'],
238 shortname=
'local_prepare',
239 timeout_seconds=prepare_timeout))
240 jobset.message(
'START',
'Preparing hosts.', do_newline=
True)
241 num_failures, _ = jobset.run(prepare_jobs,
242 newline_on_success=
True,
244 if num_failures == 0:
245 jobset.message(
'SUCCESS',
246 'Prepare step completed successfully.',
249 jobset.message(
'FAILED',
250 'Failed to prepare remote hosts.',
256 languages=list(scenario_config.LANGUAGES.keys()),
258 """Builds performance worker on remote hosts (and maybe also locally)."""
259 build_timeout = 45 * 60
261 local_build_timeout = 60 * 60
264 user_at_host =
'%s@%s' % (_REMOTE_HOST_USERNAME, host)
267 cmdline=[
'tools/run_tests/performance/remote_host_build.sh'] +
269 shortname=
'remote_host_build.%s' % host,
271 'USER_AT_HOST': user_at_host,
274 timeout_seconds=build_timeout))
279 cmdline=[
'python',
'tools/run_tests/start_port_server.py'],
280 shortname=
'local_start_port_server',
281 timeout_seconds=2 * 60))
285 cmdline=[
'tools/run_tests/performance/build_performance.sh'] +
287 shortname=
'local_build',
288 environ={
'CONFIG':
'opt'},
289 timeout_seconds=local_build_timeout))
290 jobset.message(
'START',
'Building.', do_newline=
True)
291 num_failures, _ = jobset.run(build_jobs,
292 newline_on_success=
True,
294 if num_failures == 0:
295 jobset.message(
'SUCCESS',
'Built successfully.', do_newline=
True)
297 jobset.message(
'FAILED',
'Build failed.', do_newline=
True)
302 """Creates QPS workers (but does not start them)."""
305 workers = [(
None, 10000), (
None, 10010)]
306 elif len(worker_hosts) == 1:
308 workers = [(worker_hosts[0], 10000), (worker_hosts[0], 10010)]
311 workers = [(worker_host, 10000)
for worker_host
in worker_hosts]
315 shortname=
'qps_worker_%s_%s' %
316 (language, worker_idx),
317 port=worker[1] + language.worker_port_offset(),
318 remote_host=worker[0],
320 for language
in languages
321 for worker_idx, worker
in enumerate(workers)
326 flame_graph_reports):
327 print(
'Creating perf report collection job for %s' % worker_host)
329 if worker_host !=
'localhost':
330 user_at_host =
"%s@%s" % (_REMOTE_HOST_USERNAME, worker_host)
331 cmd =
"USER_AT_HOST=%s OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s tools/run_tests/performance/process_remote_perf_flamegraphs.sh" % (
332 user_at_host, output_filename, flame_graph_reports, perf_base_name)
334 cmd =
"OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s tools/run_tests/performance/process_local_perf_flamegraphs.sh" % (
335 output_filename, flame_graph_reports, perf_base_name)
337 return jobset.JobSpec(cmdline=cmd,
338 timeout_seconds=3 * 60,
340 verbose_success=
True,
341 shortname=
'process perf report')
344 Scenario = collections.namedtuple(
'Scenario',
'jobspec workers name')
352 bq_result_table=None,
356 """Create jobspecs for scenarios to run."""
358 worker
for workers
in list(workers_by_lang.values())
359 for worker
in workers
365 if not netperf_hosts:
366 netperf_server =
'localhost'
367 netperf_client =
None
368 elif len(netperf_hosts) == 1:
369 netperf_server = netperf_hosts[0]
370 netperf_client = netperf_hosts[0]
372 netperf_server = netperf_hosts[0]
373 netperf_client = netperf_hosts[1]
377 client_host=netperf_client,
378 bq_result_table=bq_result_table),
379 _NO_WORKERS,
'netperf'))
381 for language
in languages:
382 for scenario_json
in language.scenarios():
383 if re.search(regex, scenario_json[
'name']):
384 categories = scenario_json.get(
'CATEGORIES',
385 [
'scalable',
'smoketest'])
386 if category
in categories
or category ==
'all':
387 workers = workers_by_lang[
str(language)][:]
390 custom_server_lang = scenario_json.get(
391 'SERVER_LANGUAGE',
None)
392 custom_client_lang = scenario_json.get(
393 'CLIENT_LANGUAGE',
None)
394 scenario_json = scenario_config.remove_nonproto_fields(
396 if custom_server_lang
and custom_client_lang:
398 'Cannot set both custom CLIENT_LANGUAGE and SERVER_LANGUAGE'
399 'in the same scenario')
400 if custom_server_lang:
401 if not workers_by_lang.get(custom_server_lang, []):
402 print(
'Warning: Skipping scenario %s as' %
403 scenario_json[
'name'])
405 'SERVER_LANGUAGE is set to %s yet the language has '
406 'not been selected with -l' %
409 for idx
in range(0, scenario_json[
'num_servers']):
411 workers[idx] = workers_by_lang[custom_server_lang][
413 if custom_client_lang:
414 if not workers_by_lang.get(custom_client_lang, []):
415 print(
'Warning: Skipping scenario %s as' %
416 scenario_json[
'name'])
418 'CLIENT_LANGUAGE is set to %s yet the language has '
419 'not been selected with -l' %
422 for idx
in range(scenario_json[
'num_servers'],
426 workers[idx] = workers_by_lang[custom_client_lang][
430 scenario_json, [w.host_and_port
for w
in workers],
431 remote_host=remote_host,
432 bq_result_table=bq_result_table,
433 server_cpu_load=server_cpu_load), workers,
434 scenario_json[
'name'])
435 scenarios.append(scenario)
441 """Waits for given jobs to finish and eventually kills them."""
444 while any(job.is_running()
for job
in jobs):
445 for job
in qpsworker_jobs:
447 print(
'QPS worker "%s" is still running.' % job.host_and_port)
449 print(
'Killing all QPS workers.')
455 print(
'All QPS workers finished.')
459 profile_output_files = []
469 flame_graph_reports):
470 perf_report_jobs = []
471 global profile_output_files
472 for host_and_port
in hosts_and_base_names:
473 perf_base_name = hosts_and_base_names[host_and_port]
474 output_filename =
'%s-%s' % (scenario_name, perf_base_name)
476 host = host_and_port.split(
':')[0]
477 profile_output_files.append(
'%s.svg' % output_filename)
478 perf_report_jobs.append(
480 flame_graph_reports))
482 jobset.message(
'START',
483 'Collecting perf reports from qps workers',
485 failures, _ = jobset.run(perf_report_jobs,
486 newline_on_success=
True,
488 jobset.message(
'SUCCESS',
489 'Collecting perf reports from qps workers',
495 argp = argparse.ArgumentParser(description=
'Run performance tests.')
496 argp.add_argument(
'-l',
499 sorted(scenario_config.LANGUAGES.keys()),
502 help=
'Languages to benchmark.')
504 '--remote_driver_host',
507 'Run QPS driver on given host. By default, QPS driver is run locally.')
508 argp.add_argument(
'--remote_worker_host',
511 help=
'Worker hosts where to start QPS workers.')
515 action=
'store_const',
517 help=
'Just list scenarios to be run, but don\'t run them.')
518 argp.add_argument(
'-r',
522 help=
'Regex to select scenarios to run.')
523 argp.add_argument(
'--bq_result_table',
526 help=
'Bigquery "dataset.table" to upload results to.')
527 argp.add_argument(
'--category',
528 choices=[
'smoketest',
'all',
'scalable',
'sweep'],
530 help=
'Select a category of tests to run.')
531 argp.add_argument(
'--netperf',
533 action=
'store_const',
535 help=
'Run netperf benchmark as one of the scenarios.')
540 help=
'Select a targeted server cpu load to run. 0 means ignore this flag'
542 argp.add_argument(
'-x',
544 default=
'report.xml',
546 help=
'Name of XML report file to generate.')
549 help=(
'Example usage: "--perf_args=record -F 99 -g". '
550 'Wrap QPS workers in a perf command '
551 'with the arguments to perf specified here. '
552 '".svg" flame graph profiles will be '
553 'created for each Qps Worker on each scenario. '
554 'Files will output to "<repo_root>/<args.flame_graph_reports>" '
555 'directory. Output files from running the worker '
556 'under perf are saved in the repo root where its ran. '
557 'Note that the perf "-g" flag is necessary for '
558 'flame graphs generation to work (assuming the binary '
559 'being profiled uses frame pointers, check out '
560 '"--call-graph dwarf" option using libunwind otherwise.) '
561 'Also note that the entire "--perf_args=<arg(s)>" must '
562 'be wrapped in quotes as in the example usage. '
563 'If the "--perg_args" is unspecified, "perf" will '
564 'not be used at all. '
565 'See http://www.brendangregg.com/perf.html '
566 'for more general perf examples.'))
568 '--skip_generate_flamegraphs',
570 action=
'store_const',
572 help=(
'Turn flame graph generation off. '
573 'May be useful if "perf_args" arguments do not make sense for '
574 'generating flamegraphs (e.g., "--perf_args=stat ...")'))
577 '--flame_graph_reports',
578 default=
'perf_reports',
581 'Name of directory to output flame graph profiles to, if any are created.'
585 '--remote_host_username',
588 help=
'Use a username that isn\'t "Jenkins" to SSH into remote workers.')
590 args = argp.parse_args()
592 global _REMOTE_HOST_USERNAME
593 if args.remote_host_username:
594 _REMOTE_HOST_USERNAME = args.remote_host_username
597 scenario_config.LANGUAGES[l]
for l
in itertools.chain.from_iterable(
598 six.iterkeys(scenario_config.LANGUAGES)
if x ==
'all' else [x]
599 for x
in args.language))
603 if args.remote_worker_host:
604 for host
in args.remote_worker_host:
605 remote_hosts.add(host)
606 if args.remote_driver_host:
607 remote_hosts.add(args.remote_driver_host)
617 if not args.remote_driver_host:
621 languages=[
str(l)
for l
in languages],
622 build_local=build_local)
626 print(
'Running workers under perf profiler')
628 perf_cmd = [
'/usr/bin/perf']
629 perf_cmd.extend(re.split(
'\s+', args.perf_args))
632 args.remote_worker_host,
636 workers_by_lang = dict([(
str(language), [])
for language
in languages])
637 for job
in qpsworker_jobs:
638 workers_by_lang[
str(job.language)].append(job)
641 workers_by_lang=workers_by_lang,
642 remote_host=args.remote_driver_host,
644 category=args.category,
645 bq_result_table=args.bq_result_table,
646 netperf=args.netperf,
647 netperf_hosts=args.remote_worker_host,
648 server_cpu_load=args.server_cpu_load)
651 raise Exception(
'No scenarios to run')
653 total_scenario_failures = 0
654 qps_workers_killed = 0
655 merged_resultset = {}
656 perf_report_failures = 0
658 for scenario
in scenarios:
662 scenario_failures = 0
664 for worker
in scenario.workers:
666 jobs = [scenario.jobspec]
673 remote_host=args.remote_driver_host))
674 scenario_failures, resultset = jobset.run(
675 jobs, newline_on_success=
True, maxjobs=1)
676 total_scenario_failures += scenario_failures
677 merged_resultset = dict(
678 itertools.chain(six.iteritems(merged_resultset),
679 six.iteritems(resultset)))
683 scenario.workers, qpsworker_jobs)
685 if perf_cmd
and scenario_failures == 0
and not args.skip_generate_flamegraphs:
686 workers_and_base_names = {}
687 for worker
in scenario.workers:
688 if not worker.perf_file_base_name:
690 'using perf buf perf report filename is unspecified'
692 workers_and_base_names[
693 worker.host_and_port] = worker.perf_file_base_name
695 workers_and_base_names, scenario.name,
696 args.flame_graph_reports)
700 if perf_cmd
and not args.skip_generate_flamegraphs:
702 report_utils.render_perf_profiling_results(
703 '%s/index.html' % args.flame_graph_reports, profile_output_files)
705 report_utils.render_junit_xml_report(merged_resultset,
707 suite_name=
'benchmarks',
710 if total_scenario_failures > 0
or qps_workers_killed > 0:
711 print(
'%s scenarios failed and %s qps worker jobs killed' %
712 (total_scenario_failures, qps_workers_killed))
715 if perf_report_failures > 0:
716 print(
'%s perf profile collection jobs failed' % perf_report_failures)
720 if __name__ ==
"__main__":