OmniSciDB  72180abbfe
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
run_benchmark Namespace Reference

Functions

def verify_destinations
 
def get_connection
 
def get_run_vars
 
def get_gpu_info
 
def get_machine_info
 
def read_query_files
 
def read_setup_teardown_query_files
 
def validate_setup_teardown_query_file
 
def validate_query_file
 
def execute_query
 
def calculate_query_times
 
def get_mem_usage
 
def run_query
 
def run_setup_teardown_query
 
def json_format_handler
 
def create_results_dataset
 
def send_results_db
 
def send_results_file_json
 
def send_results_jenkins_bench
 
def send_results_output
 
def process_arguments
 
def benchmark
 

Function Documentation

def run_benchmark.benchmark (   input_arguments)

Definition at line 1549 of file run_benchmark.py.

References create_results_dataset(), get_connection(), get_gpu_info(), get_machine_info(), get_run_vars(), process_arguments(), read_query_files(), read_setup_teardown_query_files(), run_query(), run_setup_teardown_query(), send_results_db(), send_results_file_json(), send_results_jenkins_bench(), send_results_output(), and verify_destinations().

1550 def benchmark(input_arguments):
1551  # Set input args to vars
1552  args = process_arguments(input_arguments)
1553  verbose = args.verbose
1554  quiet = args.quiet
1555  source_db_user = args.user
1556  source_db_passwd = args.passwd
1557  source_db_server = args.server
1558  source_db_port = args.port
1559  source_db_name = args.name
1560  source_table = args.table
1561  label = args.label
1562  queries_dir = args.queries_dir
1563  iterations = args.iterations
1564  gpu_count = args.gpu_count
1565  gpu_name = args.gpu_name
1566  no_gather_conn_gpu_info = args.no_gather_conn_gpu_info
1567  no_gather_nvml_gpu_info = args.no_gather_nvml_gpu_info
1568  gather_nvml_gpu_info = args.gather_nvml_gpu_info
1569  machine_name = args.machine_name
1570  machine_uname = args.machine_uname
1571  destinations = args.destination
1572  dest_db_user = args.dest_user
1573  dest_db_passwd = args.dest_passwd
1574  dest_db_server = args.dest_server
1575  dest_db_port = args.dest_port
1576  dest_db_name = args.dest_name
1577  dest_table = args.dest_table
1578  dest_table_schema_file = args.dest_table_schema_file
1579  output_file_json = args.output_file_json
1580  output_file_jenkins = args.output_file_jenkins
1581  output_tag_jenkins = args.output_tag_jenkins
1582  setup_teardown_queries_dir = args.setup_teardown_queries_dir
1583  run_setup_teardown_per_query = args.run_setup_teardown_per_query
1584  foreign_table_filename = args.foreign_table_filename
1585  jenkins_thresholds_name = args.jenkins_thresholds_name
1586  jenkins_thresholds_field = args.jenkins_thresholds_field
1587 
1588  # Hard-coded vars
1589  trim = 0.15
1590 
1591  # Set logging output level
1592  if verbose:
1593  logging.basicConfig(level=logging.DEBUG)
1594  elif quiet:
1595  logging.basicConfig(level=logging.WARNING)
1596  else:
1597  logging.basicConfig(level=logging.INFO)
1598 
1599  # Input validation
1600  if (iterations > 1) is not True:
1601  # Need > 1 iteration as first iteration is dropped from calculations
1602  logging.error("Iterations must be greater than 1")
1603  exit(1)
1605  destinations=destinations,
1606  dest_db_server=dest_db_server,
1607  output_file_json=output_file_json,
1608  output_file_jenkins=output_file_jenkins,
1609  ):
1610  logging.debug("Destination(s) have been verified.")
1611  else:
1612  logging.error("No valid destination(s) have been set. Exiting.")
1613  exit(1)
1614 
1615  # Establish connection to mapd db
1616  con = get_connection(
1617  db_user=source_db_user,
1618  db_passwd=source_db_passwd,
1619  db_server=source_db_server,
1620  db_port=source_db_port,
1621  db_name=source_db_name,
1622  )
1623  if not con:
1624  exit(1) # Exit if cannot connect to db
1625  # Set run-specific variables (time, uid, etc.)
1626  run_vars = get_run_vars(con=con)
1627  # Set GPU info depending on availability
1628  gpu_info = get_gpu_info(
1629  gpu_name=gpu_name,
1630  no_gather_conn_gpu_info=no_gather_conn_gpu_info,
1631  con=con,
1632  conn_machine_name=run_vars["conn_machine_name"],
1633  no_gather_nvml_gpu_info=no_gather_nvml_gpu_info,
1634  gather_nvml_gpu_info=gather_nvml_gpu_info,
1635  gpu_count=gpu_count,
1636  )
1637  # Set run machine info
1638  machine_info = get_machine_info(
1639  conn_machine_name=run_vars["conn_machine_name"],
1640  machine_name=machine_name,
1641  machine_uname=machine_uname,
1642  )
1643  # Read queries from files, set to queries dir in PWD if not passed in
1644  if not queries_dir:
1645  queries_dir = os.path.join(os.path.dirname(__file__), "queries")
1646  query_list = read_query_files(
1647  queries_dir=queries_dir, source_table=source_table
1648  )
1649  if not query_list:
1650  exit(1)
1651  # Read setup/teardown queries if they exist
1652  setup_query_list, teardown_query_list =\
1653  read_setup_teardown_query_files(queries_dir=setup_teardown_queries_dir,
1654  source_table=source_table,
1655  foreign_table_filename=foreign_table_filename)
1656  # Check at what granularity we want to run setup or teardown queries at
1657  run_global_setup_queries = setup_query_list is not None and not run_setup_teardown_per_query
1658  run_per_query_setup_queries = setup_query_list is not None and run_setup_teardown_per_query
1659  run_global_teardown_queries = teardown_query_list is not None and not run_setup_teardown_per_query
1660  run_per_query_teardown_queries = teardown_query_list is not None and run_setup_teardown_per_query
1661  # Run global setup queries if they exist
1662  queries_results = []
1663  st_qr = run_setup_teardown_query(queries=setup_query_list,
1664  do_run=run_global_setup_queries, trim=trim, con=con)
1665  queries_results.extend(st_qr)
1666  # Run queries
1667  for query in query_list["queries"]:
1668  # Run setup queries
1669  st_qr = run_setup_teardown_query(
1670  queries=setup_query_list, do_run=run_per_query_setup_queries, trim=trim, con=con)
1671  queries_results.extend(st_qr)
1672  # Run benchmark query
1673  query_result = run_query(
1674  query=query, iterations=iterations, trim=trim, con=con
1675  )
1676  queries_results.append(query_result)
1677  # Run tear-down queries
1678  st_qr = run_setup_teardown_query(
1679  queries=teardown_query_list, do_run=run_per_query_teardown_queries, trim=trim, con=con)
1680  queries_results.extend(st_qr)
1681  logging.info("Completed all queries.")
1682  # Run global tear-down queries if they exist
1683  st_qr = run_setup_teardown_query(queries=teardown_query_list,
1684  do_run=run_global_teardown_queries, trim=trim, con=con)
1685  queries_results.extend(st_qr)
1686  logging.debug("Closing source db connection.")
1687  con.close()
1688  # Generate results dataset
1689  results_dataset = create_results_dataset(
1690  run_guid=run_vars["run_guid"],
1691  run_timestamp=run_vars["run_timestamp"],
1692  run_connection=run_vars["run_connection"],
1693  run_machine_name=machine_info["run_machine_name"],
1694  run_machine_uname=machine_info["run_machine_uname"],
1695  run_driver=run_vars["run_driver"],
1696  run_version=run_vars["run_version"],
1697  run_version_short=run_vars["run_version_short"],
1698  label=label,
1699  source_db_gpu_count=gpu_info["source_db_gpu_count"],
1700  source_db_gpu_driver_ver=gpu_info["source_db_gpu_driver_ver"],
1701  source_db_gpu_name=gpu_info["source_db_gpu_name"],
1702  source_db_gpu_mem=gpu_info["source_db_gpu_mem"],
1703  source_table=source_table,
1704  trim=trim,
1705  iterations=iterations,
1706  query_group=query_list["query_group"],
1707  queries_results=queries_results,
1708  )
1709  results_dataset_json = json.dumps(
1710  results_dataset, default=json_format_handler, indent=2
1711  )
1712  successful_results_dataset = [
1713  x for x in results_dataset if x["succeeded"] is not False
1714  ]
1715  successful_results_dataset_results = []
1716  for results_dataset_entry in successful_results_dataset:
1717  successful_results_dataset_results.append(
1718  results_dataset_entry["results"]
1719  )
1720  # Send results to destination(s)
1721  sent_destination = True
1722  if "mapd_db" in destinations:
1723  if not send_results_db(
1724  results_dataset=successful_results_dataset_results,
1725  table=dest_table,
1726  db_user=dest_db_user,
1727  db_passwd=dest_db_passwd,
1728  db_server=dest_db_server,
1729  db_port=dest_db_port,
1730  db_name=dest_db_name,
1731  table_schema_file=dest_table_schema_file,
1732  ):
1733  sent_destination = False
1734  if "file_json" in destinations:
1735  if not send_results_file_json(
1736  results_dataset_json=results_dataset_json,
1737  output_file_json=output_file_json,
1738  ):
1739  sent_destination = False
1740  if "jenkins_bench" in destinations:
1742  results_dataset=successful_results_dataset_results,
1743  thresholds_name=jenkins_thresholds_name,
1744  thresholds_field=jenkins_thresholds_field,
1745  output_tag_jenkins=output_tag_jenkins,
1746  output_file_jenkins=output_file_jenkins,
1747  ):
1748  sent_destination = False
1749  if "output" in destinations:
1750  if not send_results_output(results_dataset_json=results_dataset_json):
1751  sent_destination = False
1752  if not sent_destination:
1753  logging.error("Sending results to one or more destinations failed")
1754  exit(1)
1755  else:
1756  logging.info(
1757  "Succesfully loaded query results info into destination(s)"
1758  )
1759 
def verify_destinations
def create_results_dataset
def read_setup_teardown_query_files
def send_results_file_json
def run_setup_teardown_query
def send_results_jenkins_bench

+ Here is the call graph for this function:

def run_benchmark.calculate_query_times (   kwargs)
  Calculates aggregate query times from all iteration times

  Kwargs:
    total_times(list): List of total time calculations
    execution_times(list): List of execution_time calculations
    results_iter_times(list): List of results_iter_time calculations
    connect_times(list): List of connect_time calculations
    trim(float): Amount to trim from iterations set to gather trimmed
                 values. Enter as deciman corresponding to percent to
                 trim - ex: 0.15 to trim 15%.

  Returns:
    query_execution(dict): Query times
    False(bool): The query failed. Exception should be logged.

Definition at line 523 of file run_benchmark.py.

Referenced by create_results_dataset().

524 def calculate_query_times(**kwargs):
525  """
526  Calculates aggregate query times from all iteration times
527 
528  Kwargs:
529  total_times(list): List of total time calculations
530  execution_times(list): List of execution_time calculations
531  results_iter_times(list): List of results_iter_time calculations
532  connect_times(list): List of connect_time calculations
533  trim(float): Amount to trim from iterations set to gather trimmed
534  values. Enter as deciman corresponding to percent to
535  trim - ex: 0.15 to trim 15%.
536 
537  Returns:
538  query_execution(dict): Query times
539  False(bool): The query failed. Exception should be logged.
540  """
541  trim_size = int(kwargs["trim"] * len(kwargs["total_times"]))
542  return {
543  "total_time_avg": round(numpy.mean(kwargs["total_times"]), 1),
544  "total_time_min": round(numpy.min(kwargs["total_times"]), 1),
545  "total_time_max": round(numpy.max(kwargs["total_times"]), 1),
546  "total_time_85": round(numpy.percentile(kwargs["total_times"], 85), 1),
547  "total_time_trimmed_avg": round(
548  numpy.mean(
549  numpy.sort(kwargs["total_times"])[trim_size:-trim_size]
550  ),
551  1,
552  )
553  if trim_size
554  else round(numpy.mean(kwargs["total_times"]), 1),
555  "total_times": kwargs["total_times"],
556  "execution_time_avg": round(numpy.mean(kwargs["execution_times"]), 1),
557  "execution_time_min": round(numpy.min(kwargs["execution_times"]), 1),
558  "execution_time_max": round(numpy.max(kwargs["execution_times"]), 1),
559  "execution_time_85": round(
560  numpy.percentile(kwargs["execution_times"], 85), 1
561  ),
562  "execution_time_25": round(
563  numpy.percentile(kwargs["execution_times"], 25), 1
564  ),
565  "execution_time_std": round(numpy.std(kwargs["execution_times"]), 1),
566  "execution_time_trimmed_avg": round(
567  numpy.mean(
568  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
569  )
570  )
571  if trim_size > 0
572  else round(numpy.mean(kwargs["execution_times"]), 1),
573  "execution_time_trimmed_max": round(
574  numpy.max(
575  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
576  )
577  )
578  if trim_size > 0
579  else round(numpy.max(kwargs["execution_times"]), 1),
580  "execution_times": kwargs["execution_times"],
581  "connect_time_avg": round(numpy.mean(kwargs["connect_times"]), 1),
582  "connect_time_min": round(numpy.min(kwargs["connect_times"]), 1),
583  "connect_time_max": round(numpy.max(kwargs["connect_times"]), 1),
584  "connect_time_85": round(
585  numpy.percentile(kwargs["connect_times"], 85), 1
586  ),
587  "results_iter_time_avg": round(
588  numpy.mean(kwargs["results_iter_times"]), 1
589  ),
590  "results_iter_time_min": round(
591  numpy.min(kwargs["results_iter_times"]), 1
592  ),
593  "results_iter_time_max": round(
594  numpy.max(kwargs["results_iter_times"]), 1
595  ),
596  "results_iter_time_85": round(
597  numpy.percentile(kwargs["results_iter_times"], 85), 1
598  ),
599  }
600 
def calculate_query_times

+ Here is the caller graph for this function:

def run_benchmark.create_results_dataset (   kwargs)
  Create results dataset

  Kwargs:
    run_guid(str): Run GUID
    run_timestamp(datetime): Run timestamp
    run_connection(str): Connection string
    run_machine_name(str): Run machine name
    run_machine_uname(str): Run machine uname
    run_driver(str): Run driver
    run_version(str): Version of DB
    run_version_short(str): Shortened version of DB
    label(str): Run label
    source_db_gpu_count(int): Number of GPUs on run machine
    source_db_gpu_driver_ver(str): GPU driver version
    source_db_gpu_name(str): GPU name
    source_db_gpu_mem(str): Amount of GPU mem on run machine
    source_table(str): Table to run query against
    trim(float): Trim decimal to remove from top and bottom of results
    iterations(int): Number of iterations of each query to run
    query_group(str): Query group, usually matches table name
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query

  Returns:
    results_dataset(list):::
        result_dataset(dict): Query results dataset

Definition at line 884 of file run_benchmark.py.

References calculate_query_times().

Referenced by benchmark().

885 def create_results_dataset(**kwargs):
886  """
887  Create results dataset
888 
889  Kwargs:
890  run_guid(str): Run GUID
891  run_timestamp(datetime): Run timestamp
892  run_connection(str): Connection string
893  run_machine_name(str): Run machine name
894  run_machine_uname(str): Run machine uname
895  run_driver(str): Run driver
896  run_version(str): Version of DB
897  run_version_short(str): Shortened version of DB
898  label(str): Run label
899  source_db_gpu_count(int): Number of GPUs on run machine
900  source_db_gpu_driver_ver(str): GPU driver version
901  source_db_gpu_name(str): GPU name
902  source_db_gpu_mem(str): Amount of GPU mem on run machine
903  source_table(str): Table to run query against
904  trim(float): Trim decimal to remove from top and bottom of results
905  iterations(int): Number of iterations of each query to run
906  query_group(str): Query group, usually matches table name
907  query_results(dict):::
908  query_name(str): Name of query
909  query_mapdql(str): Query to run
910  query_id(str): Query ID
911  query_succeeded(bool): Query succeeded
912  query_error_info(str): Query error info
913  result_count(int): Number of results returned
914  initial_iteration_results(dict):::
915  first_execution_time(float): Execution time for first query
916  iteration
917  first_connect_time(float): Connect time for first query
918  iteration
919  first_results_iter_time(float): Results iteration time for
920  first query iteration
921  first_total_time(float): Total time for first iteration
922  first_cpu_mem_usage(float): CPU memory usage for first query
923  iteration
924  first_gpu_mem_usage(float): GPU memory usage for first query
925  iteration
926  noninitial_iteration_results(list):::
927  execution_time(float): Time (in ms) that pymapd reports
928  backend spent on query.
929  connect_time(float): Time (in ms) for overhead of query,
930  calculated by subtracting backend execution time from
931  time spent on the execution function.
932  results_iter_time(float): Time (in ms) it took to for
933  pymapd.fetchone() to iterate through all of the results.
934  total_time(float): Time (in ms) from adding all above times.
935  query_total_elapsed_time(int): Total elapsed time for query
936 
937  Returns:
938  results_dataset(list):::
939  result_dataset(dict): Query results dataset
940  """
941  results_dataset = []
942  for query_results in kwargs["queries_results"]:
943  if query_results["query_succeeded"]:
944  # Aggregate iteration values
945  execution_times, connect_times, results_iter_times, total_times = (
946  [],
947  [],
948  [],
949  [],
950  )
951  detailed_timing_last_iteration = {}
952  if len(query_results["noninitial_iteration_results"]) == 0:
953  # A single query run (most likely a setup or teardown query)
954  initial_result = query_results["initial_iteration_results"]
955  execution_times.append(initial_result["first_execution_time"])
956  connect_times.append(initial_result["first_connect_time"])
957  results_iter_times.append(
958  initial_result["first_results_iter_time"]
959  )
960  total_times.append(initial_result["first_total_time"])
961  # Special case
962  result_count = 1
963  else:
964  # More than one query run
965  for noninitial_result in query_results[
966  "noninitial_iteration_results"
967  ]:
968  execution_times.append(noninitial_result["execution_time"])
969  connect_times.append(noninitial_result["connect_time"])
970  results_iter_times.append(
971  noninitial_result["results_iter_time"]
972  )
973  total_times.append(noninitial_result["total_time"])
974  # Overwrite result count, same for each iteration
975  result_count = noninitial_result["result_count"]
976 
977  # If available, getting the last iteration's component-wise timing information as a json structure
978  if (
979  query_results["noninitial_iteration_results"][-1]["debug_info"]
980  ):
981  detailed_timing_last_iteration = json.loads(
982  query_results["noninitial_iteration_results"][-1][
983  "debug_info"
984  ]
985  )["timer"]
986  # Calculate query times
987  logging.debug(
988  "Calculating times from query " + query_results["query_id"]
989  )
990  query_times = calculate_query_times(
991  total_times=total_times,
992  execution_times=execution_times,
993  connect_times=connect_times,
994  results_iter_times=results_iter_times,
995  trim=kwargs[
996  "trim"
997  ], # Trim top and bottom n% for trimmed calculations
998  )
999  result_dataset = {
1000  "name": query_results["query_name"],
1001  "mapdql": query_results["query_mapdql"],
1002  "succeeded": True,
1003  "results": {
1004  "run_guid": kwargs["run_guid"],
1005  "run_timestamp": kwargs["run_timestamp"],
1006  "run_connection": kwargs["run_connection"],
1007  "run_machine_name": kwargs["run_machine_name"],
1008  "run_machine_uname": kwargs["run_machine_uname"],
1009  "run_driver": kwargs["run_driver"],
1010  "run_version": kwargs["run_version"],
1011  "run_version_short": kwargs["run_version_short"],
1012  "run_label": kwargs["label"],
1013  "run_gpu_count": kwargs["source_db_gpu_count"],
1014  "run_gpu_driver_ver": kwargs["source_db_gpu_driver_ver"],
1015  "run_gpu_name": kwargs["source_db_gpu_name"],
1016  "run_gpu_mem_mb": kwargs["source_db_gpu_mem"],
1017  "run_table": kwargs["source_table"],
1018  "query_group": kwargs["query_group"],
1019  "query_id": query_results["query_id"],
1020  "query_result_set_count": result_count,
1021  "query_error_info": query_results["query_error_info"],
1022  "query_conn_first": query_results[
1023  "initial_iteration_results"
1024  ]["first_connect_time"],
1025  "query_conn_avg": query_times["connect_time_avg"],
1026  "query_conn_min": query_times["connect_time_min"],
1027  "query_conn_max": query_times["connect_time_max"],
1028  "query_conn_85": query_times["connect_time_85"],
1029  "query_exec_first": query_results[
1030  "initial_iteration_results"
1031  ]["first_execution_time"],
1032  "query_exec_avg": query_times["execution_time_avg"],
1033  "query_exec_min": query_times["execution_time_min"],
1034  "query_exec_max": query_times["execution_time_max"],
1035  "query_exec_85": query_times["execution_time_85"],
1036  "query_exec_25": query_times["execution_time_25"],
1037  "query_exec_stdd": query_times["execution_time_std"],
1038  "query_exec_trimmed_avg": query_times[
1039  "execution_time_trimmed_avg"
1040  ],
1041  "query_exec_trimmed_max": query_times[
1042  "execution_time_trimmed_max"
1043  ],
1044  # Render queries not supported yet
1045  "query_render_first": None,
1046  "query_render_avg": None,
1047  "query_render_min": None,
1048  "query_render_max": None,
1049  "query_render_85": None,
1050  "query_render_25": None,
1051  "query_render_stdd": None,
1052  "query_total_first": query_results[
1053  "initial_iteration_results"
1054  ]["first_total_time"],
1055  "query_total_avg": query_times["total_time_avg"],
1056  "query_total_min": query_times["total_time_min"],
1057  "query_total_max": query_times["total_time_max"],
1058  "query_total_85": query_times["total_time_85"],
1059  "query_total_all": query_results[
1060  "query_total_elapsed_time"
1061  ],
1062  "query_total_trimmed_avg": query_times[
1063  "total_time_trimmed_avg"
1064  ],
1065  "results_iter_count": kwargs["iterations"],
1066  "results_iter_first": query_results[
1067  "initial_iteration_results"
1068  ]["first_results_iter_time"],
1069  "results_iter_avg": query_times["results_iter_time_avg"],
1070  "results_iter_min": query_times["results_iter_time_min"],
1071  "results_iter_max": query_times["results_iter_time_max"],
1072  "results_iter_85": query_times["results_iter_time_85"],
1073  "cpu_mem_usage_mb": query_results[
1074  "initial_iteration_results"
1075  ]["first_cpu_mem_usage"],
1076  "gpu_mem_usage_mb": query_results[
1077  "initial_iteration_results"
1078  ]["first_gpu_mem_usage"],
1079  },
1080  "debug": {
1081  "query_exec_times": query_times["execution_times"],
1082  "query_total_times": query_times["total_times"],
1083  "detailed_timing_last_iteration": detailed_timing_last_iteration,
1084  },
1085  }
1086  elif not query_results["query_succeeded"]:
1087  result_dataset = {
1088  "name": query_results["query_name"],
1089  "mapdql": query_results["query_mapdql"],
1090  "succeeded": False,
1091  }
1092  results_dataset.append(result_dataset)
1093  logging.debug("All values set for query " + query_results["query_id"])
1094  return results_dataset
1095 
def create_results_dataset
def calculate_query_times

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.execute_query (   kwargs)
  Executes a query against the connected db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#querying

  Kwargs:
    query_name(str): Name of query
    query_mapdql(str): Query to run
    iteration(int): Iteration number
    con(class): Connection class

  Returns:
    query_execution(dict):::
      result_count(int): Number of results returned
      execution_time(float): Time (in ms) that pymapd reports
                             backend spent on query.
      connect_time(float): Time (in ms) for overhead of query, calculated
                           by subtracting backend execution time
                           from time spent on the execution function.
      results_iter_time(float): Time (in ms) it took to for
                                pymapd.fetchone() to iterate through all
                                of the results.
      total_time(float): Time (in ms) from adding all above times.
    False(bool): The query failed. Exception should be logged.

Definition at line 441 of file run_benchmark.py.

Referenced by run_query().

442 def execute_query(**kwargs):
443  """
444  Executes a query against the connected db using pymapd
445  https://pymapd.readthedocs.io/en/latest/usage.html#querying
446 
447  Kwargs:
448  query_name(str): Name of query
449  query_mapdql(str): Query to run
450  iteration(int): Iteration number
451  con(class): Connection class
452 
453  Returns:
454  query_execution(dict):::
455  result_count(int): Number of results returned
456  execution_time(float): Time (in ms) that pymapd reports
457  backend spent on query.
458  connect_time(float): Time (in ms) for overhead of query, calculated
459  by subtracting backend execution time
460  from time spent on the execution function.
461  results_iter_time(float): Time (in ms) it took to for
462  pymapd.fetchone() to iterate through all
463  of the results.
464  total_time(float): Time (in ms) from adding all above times.
465  False(bool): The query failed. Exception should be logged.
466  """
467  start_time = timeit.default_timer()
468  try:
469  # Run the query
470  query_result = kwargs["con"].execute(kwargs["query_mapdql"])
471  logging.debug(
472  "Completed iteration "
473  + str(kwargs["iteration"])
474  + " of query "
475  + kwargs["query_name"]
476  )
477  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
478  logging.exception(
479  "Error running query "
480  + kwargs["query_name"]
481  + " during iteration "
482  + str(kwargs["iteration"])
483  )
484  return False
485 
486  # Calculate times
487  query_elapsed_time = (timeit.default_timer() - start_time) * 1000
488  execution_time = query_result._result.execution_time_ms
489  debug_info = query_result._result.debug
490  connect_time = round((query_elapsed_time - execution_time), 1)
491  # Iterate through each result from the query
492  logging.debug(
493  "Counting results from query"
494  + kwargs["query_name"]
495  + " iteration "
496  + str(kwargs["iteration"])
497  )
498  result_count = 0
499  start_time = timeit.default_timer()
500  while query_result.fetchone():
501  result_count += 1
502  results_iter_time = round(
503  ((timeit.default_timer() - start_time) * 1000), 1
504  )
505  query_execution = {
506  "result_count": result_count,
507  "execution_time": execution_time,
508  "connect_time": connect_time,
509  "results_iter_time": results_iter_time,
510  "total_time": execution_time + connect_time + results_iter_time,
511  "debug_info": debug_info,
512  }
513  logging.debug(
514  "Execution results for query"
515  + kwargs["query_name"]
516  + " iteration "
517  + str(kwargs["iteration"])
518  + ": "
519  + str(query_execution)
520  )
521  return query_execution
522 

+ Here is the caller graph for this function:

def run_benchmark.get_connection (   kwargs)
  Connects to the db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#connecting

  Kwargs:
    db_user(str): DB username
    db_passwd(str): DB password
    db_server(str): DB host
    db_port(int): DB port
    db_name(str): DB name

  Returns:
    con(class): Connection class
    False(bool): The connection failed. Exception should be logged.

Definition at line 61 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and send_results_db().

61 
62 def get_connection(**kwargs):
63  """
64  Connects to the db using pymapd
65  https://pymapd.readthedocs.io/en/latest/usage.html#connecting
66 
67  Kwargs:
68  db_user(str): DB username
69  db_passwd(str): DB password
70  db_server(str): DB host
71  db_port(int): DB port
72  db_name(str): DB name
73 
74  Returns:
75  con(class): Connection class
76  False(bool): The connection failed. Exception should be logged.
77  """
78  try:
79  logging.debug("Connecting to mapd db...")
80  con = pymapd.connect(
81  user=kwargs["db_user"],
82  password=kwargs["db_passwd"],
83  host=kwargs["db_server"],
84  port=kwargs["db_port"],
85  dbname=kwargs["db_name"],
86  )
87  logging.info("Succesfully connected to mapd db")
88  return con
89  except (pymapd.exceptions.OperationalError, pymapd.exceptions.Error):
90  logging.exception("Error connecting to database.")
91  return False
92 

+ Here is the caller graph for this function:

def run_benchmark.get_gpu_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    gpu_name(str): GPU name from input param
    no_gather_conn_gpu_info(bool): Gather GPU info fields
    con(class 'pymapd.connection.Connection'): Mapd connection
    conn_machine_name(str): Name of run machine
    no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
    gather_nvml_gpu_info(bool): Gather GPU info using nvml
    gpu_count(int): Number of GPUs on run machine

  Returns:
    gpu_info(dict):::
        conn_gpu_count(int): Number of GPUs gathered from pymapd con
        source_db_gpu_count(int): Number of GPUs on run machine
        source_db_gpu_mem(str): Amount of GPU mem on run machine
        source_db_gpu_driver_ver(str): GPU driver version
        source_db_gpu_name(str): GPU name

Definition at line 134 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

135 def get_gpu_info(**kwargs):
136  """
137  Gets run machine GPU info
138 
139  Kwargs:
140  gpu_name(str): GPU name from input param
141  no_gather_conn_gpu_info(bool): Gather GPU info fields
142  con(class 'pymapd.connection.Connection'): Mapd connection
143  conn_machine_name(str): Name of run machine
144  no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
145  gather_nvml_gpu_info(bool): Gather GPU info using nvml
146  gpu_count(int): Number of GPUs on run machine
147 
148  Returns:
149  gpu_info(dict):::
150  conn_gpu_count(int): Number of GPUs gathered from pymapd con
151  source_db_gpu_count(int): Number of GPUs on run machine
152  source_db_gpu_mem(str): Amount of GPU mem on run machine
153  source_db_gpu_driver_ver(str): GPU driver version
154  source_db_gpu_name(str): GPU name
155  """
156  # Set GPU info fields
157  conn_gpu_count = None
158  source_db_gpu_count = None
159  source_db_gpu_mem = None
160  source_db_gpu_driver_ver = ""
161  source_db_gpu_name = ""
162  if kwargs["no_gather_conn_gpu_info"]:
163  logging.debug(
164  "--no-gather-conn-gpu-info passed, "
165  + "using blank values for source database GPU info fields "
166  + "[run_gpu_count, run_gpu_mem_mb] "
167  )
168  else:
169  logging.debug(
170  "Gathering source database GPU info fields "
171  + "[run_gpu_count, run_gpu_mem_mb] "
172  + "using pymapd connection info. "
173  )
174  conn_hardware_info = kwargs["con"]._client.get_hardware_info(
175  kwargs["con"]._session
176  )
177  conn_gpu_count = conn_hardware_info.hardware_info[0].num_gpu_allocated
178  if conn_gpu_count == 0 or conn_gpu_count is None:
179  no_gather_nvml_gpu_info = True
180  if conn_gpu_count == 0:
181  logging.warning(
182  "0 GPUs detected from connection info, "
183  + "using blank values for source database GPU info fields "
184  + "If running against cpu-only server, make sure to set "
185  + "--no-gather-nvml-gpu-info and --no-gather-conn-gpu-info."
186  )
187  else:
188  no_gather_nvml_gpu_info = kwargs["no_gather_nvml_gpu_info"]
189  source_db_gpu_count = conn_gpu_count
190  try:
191  source_db_gpu_mem = int(
192  conn_hardware_info.hardware_info[0].gpu_info[0].memory
193  / 1000000
194  )
195  except IndexError:
196  logging.error("GPU memory info not available from connection.")
197  if no_gather_nvml_gpu_info:
198  logging.debug(
199  "--no-gather-nvml-gpu-info passed, "
200  + "using blank values for source database GPU info fields "
201  + "[gpu_driver_ver, run_gpu_name] "
202  )
203  elif (
204  kwargs["conn_machine_name"] == "localhost"
205  or kwargs["gather_nvml_gpu_info"]
206  ):
207  logging.debug(
208  "Gathering source database GPU info fields "
209  + "[gpu_driver_ver, run_gpu_name] "
210  + "from local GPU using pynvml. "
211  )
212  import pynvml
213 
214  pynvml.nvmlInit()
215  source_db_gpu_driver_ver = pynvml.nvmlSystemGetDriverVersion().decode()
216  for i in range(source_db_gpu_count):
217  handle = pynvml.nvmlDeviceGetHandleByIndex(i)
218  # Assume all cards are the same, overwrite name value
219  source_db_gpu_name = pynvml.nvmlDeviceGetName(handle).decode()
220  pynvml.nvmlShutdown()
221  # If gpu_count argument passed in, override gathered value
222  if kwargs["gpu_count"]:
223  source_db_gpu_count = kwargs["gpu_count"]
224  if kwargs["gpu_name"]:
225  source_db_gpu_name = kwargs["gpu_name"]
226  gpu_info = {
227  "conn_gpu_count": conn_gpu_count,
228  "source_db_gpu_count": source_db_gpu_count,
229  "source_db_gpu_mem": source_db_gpu_mem,
230  "source_db_gpu_driver_ver": source_db_gpu_driver_ver,
231  "source_db_gpu_name": source_db_gpu_name,
232  }
233  return gpu_info
234 

+ Here is the caller graph for this function:

def run_benchmark.get_machine_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    conn_machine_name(str): Name of machine from pymapd con
    machine_name(str): Name of machine if passed in
    machine_uname(str): Uname of machine if passed in

  Returns:
    machine_info(dict):::
        run_machine_name(str): Run machine name
        run_machine_uname(str): Run machine uname

Definition at line 235 of file run_benchmark.py.

References join().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

236 def get_machine_info(**kwargs):
237  """
238  Gets run machine GPU info
239 
240  Kwargs:
241  conn_machine_name(str): Name of machine from pymapd con
242  machine_name(str): Name of machine if passed in
243  machine_uname(str): Uname of machine if passed in
244 
245  Returns:
246  machine_info(dict):::
247  run_machine_name(str): Run machine name
248  run_machine_uname(str): Run machine uname
249  """
250  # Set machine names, using local info if connected to localhost
251  if kwargs["conn_machine_name"] == "localhost":
252  local_uname = os.uname()
253  # If --machine-name passed in, override pymapd con value
254  if kwargs["machine_name"]:
255  run_machine_name = kwargs["machine_name"]
256  else:
257  if kwargs["conn_machine_name"] == "localhost":
258  run_machine_name = local_uname.nodename.split(".")[0]
259  else:
260  run_machine_name = kwargs["conn_machine_name"]
261  # If --machine-uname passed in, override pymapd con value
262  if kwargs["machine_uname"]:
263  run_machine_uname = kwargs["machine_uname"]
264  else:
265  if kwargs["conn_machine_name"] == "localhost":
266  run_machine_uname = " ".join(local_uname)
267  else:
268  run_machine_uname = ""
269  machine_info = {
270  "run_machine_name": run_machine_name,
271  "run_machine_uname": run_machine_uname,
272  }
273  return machine_info
274 
std::string join(T const &container, std::string const &delim)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.get_mem_usage (   kwargs)
  Calculates memory statistics from mapd_server _client.get_memory call

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection
    mem_type(str): [gpu, cpu] Type of memory to gather metrics for

  Returns:
    ramusage(dict):::
      usedram(float): Amount of memory (in MB) used
      freeram(float): Amount of memory (in MB) free
      totalallocated(float): Total amount of memory (in MB) allocated
      errormessage(str): Error if returned by get_memory call
      rawdata(list): Raw data returned from get_memory call

Definition at line 601 of file run_benchmark.py.

Referenced by run_benchmark_arrow.run_query(), and run_query().

602 def get_mem_usage(**kwargs):
603  """
604  Calculates memory statistics from mapd_server _client.get_memory call
605 
606  Kwargs:
607  con(class 'pymapd.connection.Connection'): Mapd connection
608  mem_type(str): [gpu, cpu] Type of memory to gather metrics for
609 
610  Returns:
611  ramusage(dict):::
612  usedram(float): Amount of memory (in MB) used
613  freeram(float): Amount of memory (in MB) free
614  totalallocated(float): Total amount of memory (in MB) allocated
615  errormessage(str): Error if returned by get_memory call
616  rawdata(list): Raw data returned from get_memory call
617  """
618  try:
619  con_mem_data_list = kwargs["con"]._client.get_memory(
620  session=kwargs["con"]._session, memory_level=kwargs["mem_type"]
621  )
622  usedram = 0
623  freeram = 0
624  for con_mem_data in con_mem_data_list:
625  page_size = con_mem_data.page_size
626  node_memory_data_list = con_mem_data.node_memory_data
627  for node_memory_data in node_memory_data_list:
628  ram = node_memory_data.num_pages * page_size
629  is_free = node_memory_data.is_free
630  if is_free:
631  freeram += ram
632  else:
633  usedram += ram
634  totalallocated = usedram + freeram
635  if totalallocated > 0:
636  totalallocated = round(totalallocated / 1024 / 1024, 1)
637  usedram = round(usedram / 1024 / 1024, 1)
638  freeram = round(freeram / 1024 / 1024, 1)
639  ramusage = {}
640  ramusage["usedram"] = usedram
641  ramusage["freeram"] = freeram
642  ramusage["totalallocated"] = totalallocated
643  ramusage["errormessage"] = ""
644  except Exception as e:
645  errormessage = "Get memory failed with error: " + str(e)
646  logging.error(errormessage)
647  ramusage["errormessage"] = errormessage
648  return ramusage
649 

+ Here is the caller graph for this function:

def run_benchmark.get_run_vars (   kwargs)
  Gets/sets run-specific vars such as time, uid, etc.

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    run_vars(dict):::
        run_guid(str): Run GUID
        run_timestamp(datetime): Run timestamp
        run_connection(str): Connection string
        run_driver(str): Run driver
        run_version(str): Version of DB
        run_version_short(str): Shortened version of DB
        conn_machine_name(str): Name of run machine

Definition at line 93 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

93 
94 def get_run_vars(**kwargs):
95  """
96  Gets/sets run-specific vars such as time, uid, etc.
97 
98  Kwargs:
99  con(class 'pymapd.connection.Connection'): Mapd connection
100 
101  Returns:
102  run_vars(dict):::
103  run_guid(str): Run GUID
104  run_timestamp(datetime): Run timestamp
105  run_connection(str): Connection string
106  run_driver(str): Run driver
107  run_version(str): Version of DB
108  run_version_short(str): Shortened version of DB
109  conn_machine_name(str): Name of run machine
110  """
111  run_guid = str(uuid.uuid4())
112  logging.debug("Run guid: " + run_guid)
113  run_timestamp = datetime.datetime.now()
114  run_connection = str(kwargs["con"])
115  logging.debug("Connection string: " + run_connection)
116  run_driver = "" # TODO
117  run_version = kwargs["con"]._client.get_version()
118  if "-" in run_version:
119  run_version_short = run_version.split("-")[0]
120  else:
121  run_version_short = run_version
122  conn_machine_name = re.search(r"@(.*?):", run_connection).group(1)
123  run_vars = {
124  "run_guid": run_guid,
125  "run_timestamp": run_timestamp,
126  "run_connection": run_connection,
127  "run_driver": run_driver,
128  "run_version": run_version,
129  "run_version_short": run_version_short,
130  "conn_machine_name": conn_machine_name,
131  }
132  return run_vars
133 

+ Here is the caller graph for this function:

def run_benchmark.json_format_handler (   x)

Definition at line 875 of file run_benchmark.py.

876 def json_format_handler(x):
877  # Function to allow json to deal with datetime and numpy int
878  if isinstance(x, datetime.datetime):
879  return x.isoformat()
880  if isinstance(x, numpy.int64):
881  return int(x)
882  raise TypeError("Unknown type")
883 
def run_benchmark.process_arguments (   input_arguments)

Definition at line 1278 of file run_benchmark.py.

Referenced by benchmark().

1279 def process_arguments(input_arguments):
1280  # Parse input parameters
1281  parser = ArgumentParser()
1282  optional = parser._action_groups.pop()
1283  required = parser.add_argument_group("required arguments")
1284  parser._action_groups.append(optional)
1285  optional.add_argument(
1286  "-v", "--verbose", action="store_true", help="Turn on debug logging"
1287  )
1288  optional.add_argument(
1289  "-q",
1290  "--quiet",
1291  action="store_true",
1292  help="Suppress script outuput " + "(except warnings and errors)",
1293  )
1294  required.add_argument(
1295  "-u",
1296  "--user",
1297  dest="user",
1298  default="mapd",
1299  help="Source database user",
1300  )
1301  required.add_argument(
1302  "-p",
1303  "--passwd",
1304  dest="passwd",
1305  default="HyperInteractive",
1306  help="Source database password",
1307  )
1308  required.add_argument(
1309  "-s",
1310  "--server",
1311  dest="server",
1312  default="localhost",
1313  help="Source database server hostname",
1314  )
1315  optional.add_argument(
1316  "-o",
1317  "--port",
1318  dest="port",
1319  type=int,
1320  default=6274,
1321  help="Source database server port",
1322  )
1323  required.add_argument(
1324  "-n",
1325  "--name",
1326  dest="name",
1327  default="mapd",
1328  help="Source database name",
1329  )
1330  required.add_argument(
1331  "-t",
1332  "--table",
1333  dest="table",
1334  required=True,
1335  help="Source db table name",
1336  )
1337  required.add_argument(
1338  "-l",
1339  "--label",
1340  dest="label",
1341  required=True,
1342  help="Benchmark run label",
1343  )
1344  required.add_argument(
1345  "-d",
1346  "--queries-dir",
1347  dest="queries_dir",
1348  help='Absolute path to dir with query files. \
1349  [Default: "queries" dir in same location as script]',
1350  )
1351  required.add_argument(
1352  "-i",
1353  "--iterations",
1354  dest="iterations",
1355  type=int,
1356  required=True,
1357  help="Number of iterations per query. Must be > 1",
1358  )
1359  optional.add_argument(
1360  "-g",
1361  "--gpu-count",
1362  dest="gpu_count",
1363  type=int,
1364  default=None,
1365  help="Number of GPUs. Not required when gathering local gpu info",
1366  )
1367  optional.add_argument(
1368  "-G",
1369  "--gpu-name",
1370  dest="gpu_name",
1371  type=str,
1372  default="",
1373  help="Name of GPU(s). Not required when gathering local gpu info",
1374  )
1375  optional.add_argument(
1376  "--no-gather-conn-gpu-info",
1377  dest="no_gather_conn_gpu_info",
1378  action="store_true",
1379  help="Do not gather source database GPU info fields "
1380  + "[run_gpu_count, run_gpu_mem_mb] "
1381  + "using pymapd connection info. "
1382  + "Use when testing a CPU-only server.",
1383  )
1384  optional.add_argument(
1385  "--no-gather-nvml-gpu-info",
1386  dest="no_gather_nvml_gpu_info",
1387  action="store_true",
1388  help="Do not gather source database GPU info fields "
1389  + "[gpu_driver_ver, run_gpu_name] "
1390  + "from local GPU using pynvml. "
1391  + 'Defaults to True when source server is not "localhost". '
1392  + "Use when testing a CPU-only server.",
1393  )
1394  optional.add_argument(
1395  "--gather-nvml-gpu-info",
1396  dest="gather_nvml_gpu_info",
1397  action="store_true",
1398  help="Gather source database GPU info fields "
1399  + "[gpu_driver_ver, run_gpu_name] "
1400  + "from local GPU using pynvml. "
1401  + 'Defaults to True when source server is "localhost". '
1402  + "Only use when benchmarking against same machine that this script "
1403  + "is run from.",
1404  )
1405  optional.add_argument(
1406  "-m",
1407  "--machine-name",
1408  dest="machine_name",
1409  help="Name of source machine",
1410  )
1411  optional.add_argument(
1412  "-a",
1413  "--machine-uname",
1414  dest="machine_uname",
1415  help="Uname info from " + "source machine",
1416  )
1417  optional.add_argument(
1418  "-e",
1419  "--destination",
1420  dest="destination",
1421  default="mapd_db",
1422  help="Destination type: [mapd_db, file_json, output, jenkins_bench] "
1423  + "Multiple values can be input seperated by commas, "
1424  + 'ex: "mapd_db,file_json"',
1425  )
1426  optional.add_argument(
1427  "-U",
1428  "--dest-user",
1429  dest="dest_user",
1430  default="mapd",
1431  help="Destination mapd_db database user",
1432  )
1433  optional.add_argument(
1434  "-P",
1435  "--dest-passwd",
1436  dest="dest_passwd",
1437  default="HyperInteractive",
1438  help="Destination mapd_db database password",
1439  )
1440  optional.add_argument(
1441  "-S",
1442  "--dest-server",
1443  dest="dest_server",
1444  help="Destination mapd_db database server hostname"
1445  + ' (required if destination = "mapd_db")',
1446  )
1447  optional.add_argument(
1448  "-O",
1449  "--dest-port",
1450  dest="dest_port",
1451  type=int,
1452  default=6274,
1453  help="Destination mapd_db database server port",
1454  )
1455  optional.add_argument(
1456  "-N",
1457  "--dest-name",
1458  dest="dest_name",
1459  default="mapd",
1460  help="Destination mapd_db database name",
1461  )
1462  optional.add_argument(
1463  "-T",
1464  "--dest-table",
1465  dest="dest_table",
1466  default="results",
1467  help="Destination mapd_db table name",
1468  )
1469  optional.add_argument(
1470  "-C",
1471  "--dest-table-schema-file",
1472  dest="dest_table_schema_file",
1473  default="results_table_schemas/query-results.sql",
1474  help="Destination table schema file. This must be an executable "
1475  + "CREATE TABLE statement that matches the output of this script. It "
1476  + "is required when creating the results table. Default location is "
1477  + 'in "./results_table_schemas/query-results.sql"',
1478  )
1479  optional.add_argument(
1480  "-j",
1481  "--output-file-json",
1482  dest="output_file_json",
1483  help="Absolute path of .json output file "
1484  + '(required if destination = "file_json")',
1485  )
1486  optional.add_argument(
1487  "-J",
1488  "--output-file-jenkins",
1489  dest="output_file_jenkins",
1490  help="Absolute path of jenkins benchmark .json output file "
1491  + '(required if destination = "jenkins_bench")',
1492  )
1493  optional.add_argument(
1494  "-E",
1495  "--output-tag-jenkins",
1496  dest="output_tag_jenkins",
1497  default="",
1498  help="Jenkins benchmark result tag. "
1499  + 'Optional, appended to table name in "group" field',
1500  )
1501  optional.add_argument(
1502  "--setup-teardown-queries-dir",
1503  dest="setup_teardown_queries_dir",
1504  type=str,
1505  default=None,
1506  help='Absolute path to dir with setup & teardown query files. '
1507  'Query files with "setup" in the filename will be executed in '
1508  'the setup stage, likewise query files with "teardown" in '
1509  'the filenname will be executed in the tear-down stage. Queries '
1510  'execute in lexical order. [Default: None, meaning this option is '
1511  'not used.]',
1512  )
1513  optional.add_argument(
1514  "--run-setup-teardown-per-query",
1515  dest="run_setup_teardown_per_query",
1516  action="store_true",
1517  help='Run setup & teardown steps per query. '
1518  'If set, setup-teardown-queries-dir must be specified. '
1519  'If not set, but setup-teardown-queries-dir is specified '
1520  'setup & tear-down queries will run globally, that is, '
1521  'once per script invocation.'
1522  ' [Default: False]'
1523  )
1524  optional.add_argument(
1525  "-F",
1526  "--foreign-table-filename",
1527  dest="foreign_table_filename",
1528  default=None,
1529  help="Path to file containing template for import query. "
1530  "Path must be relative to the FOREIGN SERVER. "
1531  "Occurances of \"##FILE##\" within setup/teardown queries will be"
1532  " replaced with this. "
1533  )
1534  optional.add_argument(
1535  "--jenkins-thresholds-name",
1536  dest="jenkins_thresholds_name",
1537  default="average",
1538  help="Name of Jenkins output field.",
1539  )
1540  optional.add_argument(
1541  "--jenkins-thresholds-field",
1542  dest="jenkins_thresholds_field",
1543  default="query_exec_trimmed_avg",
1544  help="Field to report as jenkins output value.",
1545  )
1546  args = parser.parse_args(args=input_arguments)
1547  return args
1548 

+ Here is the caller graph for this function:

def run_benchmark.read_query_files (   kwargs)
  Gets run machine GPU info

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against

  Returns:
    query_list(dict):::
        query_group(str): Query group, usually matches table name
        queries(list)
            query(dict):::
                name(str): Name of query
                mapdql(str): Query syntax to run
    False(bool): Unable to find queries dir

Definition at line 275 of file run_benchmark.py.

References File_Namespace.append(), File_Namespace.open(), split(), and validate_query_file().

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and read_setup_teardown_query_files().

276 def read_query_files(**kwargs):
277  """
278  Gets run machine GPU info
279 
280  Kwargs:
281  queries_dir(str): Directory with query files
282  source_table(str): Table to run query against
283 
284  Returns:
285  query_list(dict):::
286  query_group(str): Query group, usually matches table name
287  queries(list)
288  query(dict):::
289  name(str): Name of query
290  mapdql(str): Query syntax to run
291  False(bool): Unable to find queries dir
292  """
293  # Read query files contents and write to query_list
294  query_list = {"query_group": "", "queries": []}
295  query_group = kwargs["queries_dir"].split("/")[-1]
296  query_list.update(query_group=query_group)
297  logging.debug("Queries dir: " + kwargs["queries_dir"])
298  try:
299  for query_filename in sorted(os.listdir(kwargs["queries_dir"])):
300  logging.debug("Validating query filename: " + query_filename)
301  if validate_query_file(query_filename=query_filename):
302  with open(
303  kwargs["queries_dir"] + "/" + query_filename, "r"
304  ) as query_filepath:
305  logging.debug(
306  "Reading query with filename: " + query_filename
307  )
308  query_mapdql = query_filepath.read().replace("\n", " ")
309  query_mapdql = query_mapdql.replace(
310  "##TAB##", kwargs["source_table"]
311  )
312  query_list["queries"].append(
313  {"name": query_filename, "mapdql": query_mapdql}
314  )
315  logging.info("Read all query files")
316  return query_list
317  except FileNotFoundError:
318  logging.exception("Could not find queries directory.")
319  return False
320 
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
size_t append(FILE *f, const size_t size, int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:135
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.read_setup_teardown_query_files (   kwargs)
  Get queries to run for setup and teardown from directory

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against
    foreign_table_filename(str): File to create foreign table from

  Returns:
    setup_queries(query_list): List of setup queries
    teardown_queries(query_list): List of teardown queries
    False(bool): Unable to find queries dir

query_list is described by:
query_list(dict):::
    query_group(str): Query group, usually matches table name
    queries(list)
        query(dict):::
            name(str): Name of query
            mapdql(str): Query syntax to run

Definition at line 321 of file run_benchmark.py.

References read_query_files(), and validate_setup_teardown_query_file().

Referenced by benchmark().

322 def read_setup_teardown_query_files(**kwargs):
323  """
324  Get queries to run for setup and teardown from directory
325 
326  Kwargs:
327  queries_dir(str): Directory with query files
328  source_table(str): Table to run query against
329  foreign_table_filename(str): File to create foreign table from
330 
331  Returns:
332  setup_queries(query_list): List of setup queries
333  teardown_queries(query_list): List of teardown queries
334  False(bool): Unable to find queries dir
335 
336  query_list is described by:
337  query_list(dict):::
338  query_group(str): Query group, usually matches table name
339  queries(list)
340  query(dict):::
341  name(str): Name of query
342  mapdql(str): Query syntax to run
343  """
344  setup_teardown_queries_dir = kwargs['queries_dir']
345  source_table = kwargs['source_table']
346  # Read setup/tear-down queries if they exist
347  setup_teardown_query_list = None
348  if setup_teardown_queries_dir is not None:
349  setup_teardown_query_list = read_query_files(
350  queries_dir=setup_teardown_queries_dir,
351  source_table=source_table
352  )
353  if kwargs["foreign_table_filename"] is not None:
354  for query in setup_teardown_query_list['queries']:
355  query['mapdql'] = query['mapdql'].replace(
356  "##FILE##", kwargs["foreign_table_filename"])
357  # Filter setup queries
358  setup_query_list = None
359  if setup_teardown_query_list is not None:
360  setup_query_list = filter(
362  query_filename=x['name'], check_which='setup', quiet=True),
363  setup_teardown_query_list['queries'])
364  setup_query_list = list(setup_query_list)
365  # Filter teardown queries
366  teardown_query_list = None
367  if setup_teardown_query_list is not None:
368  teardown_query_list = filter(
370  query_filename=x['name'], check_which='teardown', quiet=True),
371  setup_teardown_query_list['queries'])
372  teardown_query_list = list(teardown_query_list)
373  return setup_query_list, teardown_query_list
374 
def validate_setup_teardown_query_file
def read_setup_teardown_query_files

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_query (   kwargs)
  Takes query name, syntax, and iteration count and calls the
    execute_query function for each iteration. Reports total, iteration,
    and exec timings, memory usage, and failure status.

  Kwargs:
    query(dict):::
        name(str): Name of query
        mapdql(str): Query syntax to run
    iterations(int): Number of iterations of each query to run
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query
    False(bool): The query failed. Exception should be logged.

Definition at line 650 of file run_benchmark.py.

References File_Namespace.append(), execute_query(), and get_mem_usage().

Referenced by benchmark(), and run_setup_teardown_query().

651 def run_query(**kwargs):
652  """
653  Takes query name, syntax, and iteration count and calls the
654  execute_query function for each iteration. Reports total, iteration,
655  and exec timings, memory usage, and failure status.
656 
657  Kwargs:
658  query(dict):::
659  name(str): Name of query
660  mapdql(str): Query syntax to run
661  iterations(int): Number of iterations of each query to run
662  trim(float): Trim decimal to remove from top and bottom of results
663  con(class 'pymapd.connection.Connection'): Mapd connection
664 
665  Returns:
666  query_results(dict):::
667  query_name(str): Name of query
668  query_mapdql(str): Query to run
669  query_id(str): Query ID
670  query_succeeded(bool): Query succeeded
671  query_error_info(str): Query error info
672  result_count(int): Number of results returned
673  initial_iteration_results(dict):::
674  first_execution_time(float): Execution time for first query
675  iteration
676  first_connect_time(float): Connect time for first query
677  iteration
678  first_results_iter_time(float): Results iteration time for
679  first query iteration
680  first_total_time(float): Total time for first iteration
681  first_cpu_mem_usage(float): CPU memory usage for first query
682  iteration
683  first_gpu_mem_usage(float): GPU memory usage for first query
684  iteration
685  noninitial_iteration_results(list):::
686  execution_time(float): Time (in ms) that pymapd reports
687  backend spent on query.
688  connect_time(float): Time (in ms) for overhead of query,
689  calculated by subtracting backend execution time from
690  time spent on the execution function.
691  results_iter_time(float): Time (in ms) it took to for
692  pymapd.fetchone() to iterate through all of the results.
693  total_time(float): Time (in ms) from adding all above times.
694  query_total_elapsed_time(int): Total elapsed time for query
695  False(bool): The query failed. Exception should be logged.
696  """
697  logging.info(
698  "Running query: "
699  + kwargs["query"]["name"]
700  + " iterations: "
701  + str(kwargs["iterations"])
702  )
703  query_id = kwargs["query"]["name"].rsplit(".")[
704  0
705  ] # Query ID = filename without extention
706  query_results = {
707  "query_name": kwargs["query"]["name"],
708  "query_mapdql": kwargs["query"]["mapdql"],
709  "query_id": query_id,
710  "query_succeeded": True,
711  "query_error_info": "",
712  "initial_iteration_results": {},
713  "noninitial_iteration_results": [],
714  "query_total_elapsed_time": 0,
715  }
716  query_total_start_time = timeit.default_timer()
717  # Run iterations of query
718  for iteration in range(kwargs["iterations"]):
719  # Gather memory before running query iteration
720  logging.debug("Getting pre-query memory usage on CPU")
721  pre_query_cpu_mem_usage = get_mem_usage(
722  con=kwargs["con"], mem_type="cpu"
723  )
724  logging.debug("Getting pre-query memory usage on GPU")
725  pre_query_gpu_mem_usage = get_mem_usage(
726  con=kwargs["con"], mem_type="gpu"
727  )
728  # Run query iteration
729  logging.debug(
730  "Running iteration "
731  + str(iteration)
732  + " of query "
733  + kwargs["query"]["name"]
734  )
735  query_result = execute_query(
736  query_name=kwargs["query"]["name"],
737  query_mapdql=kwargs["query"]["mapdql"],
738  iteration=iteration,
739  con=kwargs["con"],
740  )
741  # Gather memory after running query iteration
742  logging.debug("Getting post-query memory usage on CPU")
743  post_query_cpu_mem_usage = get_mem_usage(
744  con=kwargs["con"], mem_type="cpu"
745  )
746  logging.debug("Getting post-query memory usage on GPU")
747  post_query_gpu_mem_usage = get_mem_usage(
748  con=kwargs["con"], mem_type="gpu"
749  )
750  # Calculate total (post minus pre) memory usage after query iteration
751  query_cpu_mem_usage = round(
752  post_query_cpu_mem_usage["usedram"]
753  - pre_query_cpu_mem_usage["usedram"],
754  1,
755  )
756  query_gpu_mem_usage = round(
757  post_query_gpu_mem_usage["usedram"]
758  - pre_query_gpu_mem_usage["usedram"],
759  1,
760  )
761  if query_result:
762  query_results.update(
763  query_error_info="" # TODO - interpret query error info
764  )
765  # Assign first query iteration times
766  if iteration == 0:
767  first_execution_time = round(query_result["execution_time"], 1)
768  first_connect_time = round(query_result["connect_time"], 1)
769  first_results_iter_time = round(
770  query_result["results_iter_time"], 1
771  )
772  first_total_time = (
773  first_execution_time
774  + first_connect_time
775  + first_results_iter_time
776  )
777  query_results.update(
778  initial_iteration_results={
779  "first_execution_time": first_execution_time,
780  "first_connect_time": first_connect_time,
781  "first_results_iter_time": first_results_iter_time,
782  "first_total_time": first_total_time,
783  "first_cpu_mem_usage": query_cpu_mem_usage,
784  "first_gpu_mem_usage": query_gpu_mem_usage,
785  }
786  )
787  else:
788  # Put noninitial iterations into query_result list
789  query_results["noninitial_iteration_results"].append(
790  query_result
791  )
792  # Verify no change in memory for noninitial iterations
793  if query_cpu_mem_usage != 0.0:
794  logging.error(
795  (
796  "Noninitial iteration ({0}) of query ({1}) "
797  + "shows non-zero CPU memory usage: {2}"
798  ).format(
799  iteration,
800  kwargs["query"]["name"],
801  query_cpu_mem_usage,
802  )
803  )
804  if query_gpu_mem_usage != 0.0:
805  logging.error(
806  (
807  "Noninitial iteration ({0}) of query ({1}) "
808  + "shows non-zero GPU memory usage: {2}"
809  ).format(
810  iteration,
811  kwargs["query"]["name"],
812  query_gpu_mem_usage,
813  )
814  )
815  else:
816  logging.warning(
817  "Error detected during execution of query: "
818  + kwargs["query"]["name"]
819  + ". This query will be skipped and "
820  + "times will not reported"
821  )
822  query_results.update(query_succeeded=False)
823  break
824  # Calculate time for all iterations to run
825  query_total_elapsed_time = round(
826  ((timeit.default_timer() - query_total_start_time) * 1000), 1
827  )
828  query_results.update(query_total_elapsed_time=query_total_elapsed_time)
829  logging.info(
830  "Completed all iterations of query " + kwargs["query"]["name"]
831  )
832  return query_results
833 
size_t append(FILE *f, const size_t size, int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:135

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_setup_teardown_query (   kwargs)
    Convenience wrapper around `run_query` to run a setup or 
    teardown query

  Kwargs:
    queries(query_list): List of queries to run
    do_run(bool): If true will run query, otherwise do nothing
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    See return value for `run_query`

    query_list is described by:
    queries(list)
        query(dict):::
            name(str): Name of query
            mapdql(str): Query syntax to run
            [setup : queries(list)]
            [teardown : queries(list)]

Definition at line 834 of file run_benchmark.py.

References run_query().

Referenced by benchmark().

835 def run_setup_teardown_query(**kwargs):
836  """
837  Convenience wrapper around `run_query` to run a setup or
838  teardown query
839 
840  Kwargs:
841  queries(query_list): List of queries to run
842  do_run(bool): If true will run query, otherwise do nothing
843  trim(float): Trim decimal to remove from top and bottom of results
844  con(class 'pymapd.connection.Connection'): Mapd connection
845 
846  Returns:
847  See return value for `run_query`
848 
849  query_list is described by:
850  queries(list)
851  query(dict):::
852  name(str): Name of query
853  mapdql(str): Query syntax to run
854  [setup : queries(list)]
855  [teardown : queries(list)]
856  """
857  query_results = list()
858  if kwargs['do_run']:
859  for query in kwargs['queries']:
860  result = run_query(
861  query=query, iterations=1,
862  trim=kwargs['trim'],
863  con=kwargs['con']
864  )
865  if not result['query_succeeded']:
866  logging.warning(
867  "Error setup or teardown query: "
868  + query["name"]
869  + ". did not complete."
870  )
871  else:
872  query_results.append(result)
873  return query_results
874 
def run_setup_teardown_query

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_db (   kwargs)
  Send results dataset to a database using pymapd

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    table(str): Results destination table name
    db_user(str): Results destination user name
    db_passwd(str): Results destination password
    db_server(str): Results destination server address
    db_port(int): Results destination server port
    db_name(str): Results destination database name
    table_schema_file(str): Path to destination database schema file

  Returns:
    True(bool): Sending results to destination database succeeded
    False(bool): Sending results to destination database failed. Exception
        should be logged.

Definition at line 1096 of file run_benchmark.py.

References get_connection(), and File_Namespace.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1097 def send_results_db(**kwargs):
1098  """
1099  Send results dataset to a database using pymapd
1100 
1101  Kwargs:
1102  results_dataset(list):::
1103  result_dataset(dict): Query results dataset
1104  table(str): Results destination table name
1105  db_user(str): Results destination user name
1106  db_passwd(str): Results destination password
1107  db_server(str): Results destination server address
1108  db_port(int): Results destination server port
1109  db_name(str): Results destination database name
1110  table_schema_file(str): Path to destination database schema file
1111 
1112  Returns:
1113  True(bool): Sending results to destination database succeeded
1114  False(bool): Sending results to destination database failed. Exception
1115  should be logged.
1116  """
1117  # Create dataframe from list of query results
1118  logging.debug("Converting results list to pandas dataframe")
1119  results_df = DataFrame(kwargs["results_dataset"])
1120  # Establish connection to destination db
1121  logging.debug("Connecting to destination db")
1122  dest_con = get_connection(
1123  db_user=kwargs["db_user"],
1124  db_passwd=kwargs["db_passwd"],
1125  db_server=kwargs["db_server"],
1126  db_port=kwargs["db_port"],
1127  db_name=kwargs["db_name"],
1128  )
1129  if not dest_con:
1130  logging.exception("Could not connect to destination db.")
1131  return False
1132  # Load results into db, creating table if it does not exist
1133  tables = dest_con.get_tables()
1134  if kwargs["table"] not in tables:
1135  logging.info("Destination table does not exist. Creating.")
1136  try:
1137  with open(kwargs["table_schema_file"], "r") as table_schema:
1138  logging.debug(
1139  "Reading table_schema_file: " + kwargs["table_schema_file"]
1140  )
1141  create_table_sql = table_schema.read().replace("\n", " ")
1142  create_table_sql = create_table_sql.replace(
1143  "##TAB##", kwargs["table"]
1144  )
1145  except FileNotFoundError:
1146  logging.exception("Could not find destination table_schema_file.")
1147  return False
1148  try:
1149  logging.debug("Executing create destination table query")
1150  dest_con.execute(create_table_sql)
1151  logging.debug("Destination table created.")
1152  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
1153  logging.exception("Error running destination table creation")
1154  return False
1155  logging.info("Loading results into destination db")
1156  try:
1157  dest_con.load_table_columnar(
1158  kwargs["table"],
1159  results_df,
1160  preserve_index=False,
1161  chunk_size_bytes=0,
1162  col_names_from_schema=True,
1163  )
1164  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
1165  logging.exception("Error loading results into destination db")
1166  dest_con.close()
1167  return False
1168  dest_con.close()
1169  return True
1170 
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_file_json (   kwargs)
  Send results dataset to a local json file

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset
    output_file_json (str): Location of .json file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1171 of file run_benchmark.py.

References File_Namespace.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1172 def send_results_file_json(**kwargs):
1173  """
1174  Send results dataset to a local json file
1175 
1176  Kwargs:
1177  results_dataset_json(str): Json-formatted query results dataset
1178  output_file_json (str): Location of .json file output
1179 
1180  Returns:
1181  True(bool): Sending results to json file succeeded
1182  False(bool): Sending results to json file failed. Exception
1183  should be logged.
1184  """
1185  try:
1186  logging.debug("Opening json output file for writing")
1187  with open(kwargs["output_file_json"], "w") as file_json_open:
1188  logging.info(
1189  "Writing to output json file: " + kwargs["output_file_json"]
1190  )
1191  file_json_open.write(kwargs["results_dataset_json"])
1192  return True
1193  except IOError:
1194  logging.exception("Error writing results to json output file")
1195  return False
1196 
def send_results_file_json
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_jenkins_bench (   kwargs)
  Send results dataset to a local json file formatted for use with jenkins
    benchmark plugin: https://github.com/jenkinsci/benchmark-plugin

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    thresholds_name(str): Name to use for Jenkins result field
    thresholds_field(str): Field to use for query threshold in jenkins
    output_tag_jenkins(str): Jenkins benchmark result tag, for different
        sets from same table
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1197 of file run_benchmark.py.

References File_Namespace.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1198 def send_results_jenkins_bench(**kwargs):
1199  """
1200  Send results dataset to a local json file formatted for use with jenkins
1201  benchmark plugin: https://github.com/jenkinsci/benchmark-plugin
1202 
1203  Kwargs:
1204  results_dataset(list):::
1205  result_dataset(dict): Query results dataset
1206  thresholds_name(str): Name to use for Jenkins result field
1207  thresholds_field(str): Field to use for query threshold in jenkins
1208  output_tag_jenkins(str): Jenkins benchmark result tag, for different
1209  sets from same table
1210  output_file_jenkins (str): Location of .json jenkins file output
1211 
1212  Returns:
1213  True(bool): Sending results to json file succeeded
1214  False(bool): Sending results to json file failed. Exception
1215  should be logged.
1216  """
1217  jenkins_bench_results = []
1218  for result_dataset in kwargs["results_dataset"]:
1219  logging.debug("Constructing output for jenkins benchmark plugin")
1220  jenkins_bench_results.append(
1221  {
1222  "name": result_dataset["query_id"],
1223  "description": "",
1224  "parameters": [],
1225  "results": [
1226  {
1227  "name": result_dataset["query_id"]
1228  + "_"
1229  + kwargs["thresholds_name"],
1230  "description": "",
1231  "unit": "ms",
1232  "dblValue": result_dataset[kwargs["thresholds_field"]],
1233  }
1234  ],
1235  }
1236  )
1237  jenkins_bench_json = json.dumps(
1238  {
1239  "groups": [
1240  {
1241  "name": result_dataset["run_table"]
1242  + kwargs["output_tag_jenkins"],
1243  "description": "Source table: "
1244  + result_dataset["run_table"],
1245  "tests": jenkins_bench_results,
1246  }
1247  ]
1248  }
1249  )
1250  try:
1251  logging.debug("Opening jenkins_bench json output file for writing")
1252  with open(kwargs["output_file_jenkins"], "w") as file_jenkins_open:
1253  logging.info(
1254  "Writing to jenkins_bench json file: "
1255  + kwargs["output_file_jenkins"]
1256  )
1257  file_jenkins_open.write(jenkins_bench_json)
1258  return True
1259  except IOError:
1260  logging.exception("Error writing results to jenkins json output file")
1261  return False
1262 
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82
def send_results_jenkins_bench

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_output (   kwargs)
  Send results dataset script output

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset

  Returns:
    True(bool): Sending results to output succeeded

Definition at line 1263 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1264 def send_results_output(**kwargs):
1265  """
1266  Send results dataset script output
1267 
1268  Kwargs:
1269  results_dataset_json(str): Json-formatted query results dataset
1270 
1271  Returns:
1272  True(bool): Sending results to output succeeded
1273  """
1274  logging.info("Printing query results to output")
1275  print(kwargs["results_dataset_json"])
1276  return True
1277 

+ Here is the caller graph for this function:

def run_benchmark.validate_query_file (   kwargs)
  Validates query file. Currently only checks the query file name

  Kwargs:
    query_filename(str): Name of query file

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 419 of file run_benchmark.py.

Referenced by read_query_files().

420 def validate_query_file(**kwargs):
421  """
422  Validates query file. Currently only checks the query file name
423 
424  Kwargs:
425  query_filename(str): Name of query file
426 
427  Returns:
428  True(bool): Query succesfully validated
429  False(bool): Query failed validation
430  """
431  if not kwargs["query_filename"].endswith(".sql"):
432  logging.warning(
433  "Query filename "
434  + kwargs["query_filename"]
435  + ' is invalid - does not end in ".sql". Skipping'
436  )
437  return False
438  else:
439  return True
440 

+ Here is the caller graph for this function:

def run_benchmark.validate_setup_teardown_query_file (   kwargs)
  Validates query file. Currently only checks the query file name, and
  checks for setup or teardown in basename

  Kwargs:
    query_filename(str): Name of query file
    check_which(bool): either 'setup' or 'teardown', decide which to
                       check
    quiet(bool): optional, if True, no warning is logged

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 375 of file run_benchmark.py.

Referenced by read_setup_teardown_query_files().

377  """
378  Validates query file. Currently only checks the query file name, and
379  checks for setup or teardown in basename
380 
381  Kwargs:
382  query_filename(str): Name of query file
383  check_which(bool): either 'setup' or 'teardown', decide which to
384  check
385  quiet(bool): optional, if True, no warning is logged
386 
387  Returns:
388  True(bool): Query succesfully validated
389  False(bool): Query failed validation
390  """
391  qfilename = kwargs["query_filename"]
392  basename = os.path.basename(qfilename)
393  check_str = False
394  if kwargs["check_which"] == 'setup':
395  check_str = basename.lower().find('setup') > -1
396  elif kwargs["check_which"] == 'teardown':
397  check_str = basename.lower().find('teardown') > -1
398  else:
399  raise TypeError('Unsupported `check_which` parameter.')
400  return_val = True
401  if not qfilename.endswith(".sql"):
402  logging.warning(
403  "Query filename "
404  + qfilename
405  + ' is invalid - does not end in ".sql". Skipping'
406  )
407  return_val = False
408  elif not check_str:
409  quiet = True if 'quiet' in kwargs and kwargs['quiet'] else False
410  if not quiet:
411  logging.warning(
412  "Query filename "
413  + qfilename
414  + ' does not match "setup" or "teardown". Skipping'
415  )
416  return_val = False
417  return return_val
418 
def validate_setup_teardown_query_file

+ Here is the caller graph for this function:

def run_benchmark.verify_destinations (   kwargs)
  Verify script output destination(s)

  Kwargs:
    destinations (list): List of destinations
    dest_db_server (str): DB output destination server
    output_file_json (str): Location of .json file output
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Destination(s) is/are valid
    False(bool): Destination(s) is/are not valid

Definition at line 17 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

17 
18 def verify_destinations(**kwargs):
19  """
20  Verify script output destination(s)
21 
22  Kwargs:
23  destinations (list): List of destinations
24  dest_db_server (str): DB output destination server
25  output_file_json (str): Location of .json file output
26  output_file_jenkins (str): Location of .json jenkins file output
27 
28  Returns:
29  True(bool): Destination(s) is/are valid
30  False(bool): Destination(s) is/are not valid
31  """
32  if "mapd_db" in kwargs["destinations"]:
33  valid_destination_set = True
34  if kwargs["dest_db_server"] is None:
35  # If dest_server is not set for mapd_db, then exit
36  logging.error(
37  '"dest_server" is required when destination = "mapd_db"'
38  )
39  if "file_json" in kwargs["destinations"]:
40  valid_destination_set = True
41  if kwargs["output_file_json"] is None:
42  # If output_file_json is not set for file_json, then exit
43  logging.error(
44  '"output_file_json" is required when destination = "file_json"'
45  )
46  if "output" in kwargs["destinations"]:
47  valid_destination_set = True
48  if "jenkins_bench" in kwargs["destinations"]:
49  valid_destination_set = True
50  if kwargs["output_file_jenkins"] is None:
51  # If output_file_jenkins is not set for jenkins_bench, then exit
52  logging.error(
53  '"output_file_jenkins" is required '
54  + 'when destination = "jenkins_bench"'
55  )
56  if not valid_destination_set:
57  return False
58  else:
59  return True
60 
def verify_destinations

+ Here is the caller graph for this function: