OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
run_benchmark Namespace Reference

Functions

def verify_destinations
 
def get_connection
 
def get_run_vars
 
def get_gpu_info
 
def get_machine_info
 
def read_query_files
 
def read_setup_teardown_query_files
 
def validate_setup_teardown_query_file
 
def validate_query_file
 
def execute_query
 
def calculate_query_times
 
def clear_memory
 
def clear_system_caches
 
def get_mem_usage
 
def run_query
 
def run_setup_teardown_query
 
def json_format_handler
 
def create_results_dataset
 
def send_results_db
 
def send_results_file_json
 
def send_results_jenkins_bench
 
def send_results_output
 
def process_arguments
 
def benchmark
 

Function Documentation

def run_benchmark.benchmark (   input_arguments)

Definition at line 1603 of file run_benchmark.py.

References create_results_dataset(), get_connection(), get_gpu_info(), get_machine_info(), get_run_vars(), process_arguments(), read_query_files(), read_setup_teardown_query_files(), run_query(), run_setup_teardown_query(), send_results_db(), send_results_file_json(), send_results_jenkins_bench(), send_results_output(), and verify_destinations().

1604 def benchmark(input_arguments):
1605  # Set input args to vars
1606  args = process_arguments(input_arguments)
1607  verbose = args.verbose
1608  quiet = args.quiet
1609  source_db_user = args.user
1610  source_db_passwd = args.passwd
1611  source_db_server = args.server
1612  source_db_port = args.port
1613  source_db_name = args.name
1614  source_table = args.table
1615  label = args.label
1616  queries_dir = args.queries_dir
1617  iterations = args.iterations
1618  gpu_count = args.gpu_count
1619  gpu_name = args.gpu_name
1620  no_gather_conn_gpu_info = args.no_gather_conn_gpu_info
1621  no_gather_nvml_gpu_info = args.no_gather_nvml_gpu_info
1622  gather_nvml_gpu_info = args.gather_nvml_gpu_info
1623  machine_name = args.machine_name
1624  machine_uname = args.machine_uname
1625  destinations = args.destination
1626  dest_db_user = args.dest_user
1627  dest_db_passwd = args.dest_passwd
1628  dest_db_server = args.dest_server
1629  dest_db_port = args.dest_port
1630  dest_db_name = args.dest_name
1631  dest_table = args.dest_table
1632  dest_table_schema_file = args.dest_table_schema_file
1633  output_file_json = args.output_file_json
1634  output_file_jenkins = args.output_file_jenkins
1635  output_tag_jenkins = args.output_tag_jenkins
1636  setup_teardown_queries_dir = args.setup_teardown_queries_dir
1637  run_setup_teardown_per_query = args.run_setup_teardown_per_query
1638  foreign_table_filename = args.foreign_table_filename
1639  jenkins_thresholds_name = args.jenkins_thresholds_name
1640  jenkins_thresholds_field = args.jenkins_thresholds_field
1641  clear_all_memory_pre_query = args.clear_all_memory_pre_query
1642 
1643  # Hard-coded vars
1644  trim = 0.15
1645 
1646  # Set logging output level
1647  if verbose:
1648  logging.basicConfig(level=logging.DEBUG)
1649  elif quiet:
1650  logging.basicConfig(level=logging.WARNING)
1651  else:
1652  logging.basicConfig(level=logging.INFO)
1653 
1654  # Input validation
1655  if (iterations > 1) is not True:
1656  # Need > 1 iteration as first iteration is dropped from calculations
1657  logging.error("Iterations must be greater than 1")
1658  exit(1)
1660  destinations=destinations,
1661  dest_db_server=dest_db_server,
1662  output_file_json=output_file_json,
1663  output_file_jenkins=output_file_jenkins,
1664  ):
1665  logging.debug("Destination(s) have been verified.")
1666  else:
1667  logging.error("No valid destination(s) have been set. Exiting.")
1668  exit(1)
1669 
1670  # Establish connection to mapd db
1671  con = get_connection(
1672  db_user=source_db_user,
1673  db_passwd=source_db_passwd,
1674  db_server=source_db_server,
1675  db_port=source_db_port,
1676  db_name=source_db_name,
1677  )
1678  if not con:
1679  exit(1) # Exit if cannot connect to db
1680  # Set run-specific variables (time, uid, etc.)
1681  run_vars = get_run_vars(con=con)
1682  # Set GPU info depending on availability
1683  gpu_info = get_gpu_info(
1684  gpu_name=gpu_name,
1685  no_gather_conn_gpu_info=no_gather_conn_gpu_info,
1686  con=con,
1687  conn_machine_name=run_vars["conn_machine_name"],
1688  no_gather_nvml_gpu_info=no_gather_nvml_gpu_info,
1689  gather_nvml_gpu_info=gather_nvml_gpu_info,
1690  gpu_count=gpu_count,
1691  )
1692  # Set run machine info
1693  machine_info = get_machine_info(
1694  conn_machine_name=run_vars["conn_machine_name"],
1695  machine_name=machine_name,
1696  machine_uname=machine_uname,
1697  )
1698  # Read queries from files, set to queries dir in PWD if not passed in
1699  if not queries_dir:
1700  queries_dir = os.path.join(os.path.dirname(__file__), "queries")
1701  query_list = read_query_files(
1702  queries_dir=queries_dir, source_table=source_table
1703  )
1704  if not query_list:
1705  exit(1)
1706  # Read setup/teardown queries if they exist
1707  setup_query_list, teardown_query_list =\
1708  read_setup_teardown_query_files(queries_dir=setup_teardown_queries_dir,
1709  source_table=source_table,
1710  foreign_table_filename=foreign_table_filename)
1711  # Check at what granularity we want to run setup or teardown queries at
1712  run_global_setup_queries = setup_query_list is not None and not run_setup_teardown_per_query
1713  run_per_query_setup_queries = setup_query_list is not None and run_setup_teardown_per_query
1714  run_global_teardown_queries = teardown_query_list is not None and not run_setup_teardown_per_query
1715  run_per_query_teardown_queries = teardown_query_list is not None and run_setup_teardown_per_query
1716  # Run global setup queries if they exist
1717  queries_results = []
1718  st_qr = run_setup_teardown_query(queries=setup_query_list,
1719  do_run=run_global_setup_queries, trim=trim, con=con)
1720  queries_results.extend(st_qr)
1721  # Run queries
1722  for query in query_list["queries"]:
1723  # Run setup queries
1724  st_qr = run_setup_teardown_query(
1725  queries=setup_query_list, do_run=run_per_query_setup_queries, trim=trim, con=con)
1726  queries_results.extend(st_qr)
1727  # Run benchmark query
1728  query_result = run_query(
1729  query=query, iterations=iterations, trim=trim, con=con, clear_all_memory_pre_query=clear_all_memory_pre_query
1730  )
1731  queries_results.append(query_result)
1732  # Run tear-down queries
1733  st_qr = run_setup_teardown_query(
1734  queries=teardown_query_list, do_run=run_per_query_teardown_queries, trim=trim, con=con)
1735  queries_results.extend(st_qr)
1736  logging.info("Completed all queries.")
1737  # Run global tear-down queries if they exist
1738  st_qr = run_setup_teardown_query(queries=teardown_query_list,
1739  do_run=run_global_teardown_queries, trim=trim, con=con)
1740  queries_results.extend(st_qr)
1741  logging.debug("Closing source db connection.")
1742  con.close()
1743  # Generate results dataset
1744  results_dataset = create_results_dataset(
1745  run_guid=run_vars["run_guid"],
1746  run_timestamp=run_vars["run_timestamp"],
1747  run_connection=run_vars["run_connection"],
1748  run_machine_name=machine_info["run_machine_name"],
1749  run_machine_uname=machine_info["run_machine_uname"],
1750  run_driver=run_vars["run_driver"],
1751  run_version=run_vars["run_version"],
1752  run_version_short=run_vars["run_version_short"],
1753  label=label,
1754  source_db_gpu_count=gpu_info["source_db_gpu_count"],
1755  source_db_gpu_driver_ver=gpu_info["source_db_gpu_driver_ver"],
1756  source_db_gpu_name=gpu_info["source_db_gpu_name"],
1757  source_db_gpu_mem=gpu_info["source_db_gpu_mem"],
1758  source_table=source_table,
1759  trim=trim,
1760  iterations=iterations,
1761  query_group=query_list["query_group"],
1762  queries_results=queries_results,
1763  )
1764  results_dataset_json = json.dumps(
1765  results_dataset, default=json_format_handler, indent=2
1766  )
1767  successful_results_dataset = [
1768  x for x in results_dataset if x["succeeded"] is not False
1769  ]
1770  successful_results_dataset_results = []
1771  for results_dataset_entry in successful_results_dataset:
1772  successful_results_dataset_results.append(
1773  results_dataset_entry["results"]
1774  )
1775  # Send results to destination(s)
1776  sent_destination = True
1777  if "mapd_db" in destinations:
1778  if not send_results_db(
1779  results_dataset=successful_results_dataset_results,
1780  table=dest_table,
1781  db_user=dest_db_user,
1782  db_passwd=dest_db_passwd,
1783  db_server=dest_db_server,
1784  db_port=dest_db_port,
1785  db_name=dest_db_name,
1786  table_schema_file=dest_table_schema_file,
1787  ):
1788  sent_destination = False
1789  if "file_json" in destinations:
1790  if not send_results_file_json(
1791  results_dataset_json=results_dataset_json,
1792  output_file_json=output_file_json,
1793  ):
1794  sent_destination = False
1795  if "jenkins_bench" in destinations:
1797  results_dataset=successful_results_dataset_results,
1798  thresholds_name=jenkins_thresholds_name,
1799  thresholds_field=jenkins_thresholds_field,
1800  output_tag_jenkins=output_tag_jenkins,
1801  output_file_jenkins=output_file_jenkins,
1802  ):
1803  sent_destination = False
1804  if "output" in destinations:
1805  if not send_results_output(results_dataset_json=results_dataset_json):
1806  sent_destination = False
1807  if not sent_destination:
1808  logging.error("Sending results to one or more destinations failed")
1809  exit(1)
1810  else:
1811  logging.info(
1812  "Succesfully loaded query results info into destination(s)"
1813  )
1814 
def verify_destinations
def create_results_dataset
def read_setup_teardown_query_files
def send_results_file_json
def run_setup_teardown_query
def send_results_jenkins_bench

+ Here is the call graph for this function:

def run_benchmark.calculate_query_times (   kwargs)
  Calculates aggregate query times from all iteration times

  Kwargs:
    total_times(list): List of total time calculations
    execution_times(list): List of execution_time calculations
    results_iter_times(list): List of results_iter_time calculations
    connect_times(list): List of connect_time calculations
    trim(float): Amount to trim from iterations set to gather trimmed
                 values. Enter as deciman corresponding to percent to
                 trim - ex: 0.15 to trim 15%.

  Returns:
    query_execution(dict): Query times
    False(bool): The query failed. Exception should be logged.

Definition at line 523 of file run_benchmark.py.

Referenced by create_results_dataset().

524 def calculate_query_times(**kwargs):
525  """
526  Calculates aggregate query times from all iteration times
527 
528  Kwargs:
529  total_times(list): List of total time calculations
530  execution_times(list): List of execution_time calculations
531  results_iter_times(list): List of results_iter_time calculations
532  connect_times(list): List of connect_time calculations
533  trim(float): Amount to trim from iterations set to gather trimmed
534  values. Enter as deciman corresponding to percent to
535  trim - ex: 0.15 to trim 15%.
536 
537  Returns:
538  query_execution(dict): Query times
539  False(bool): The query failed. Exception should be logged.
540  """
541  trim_size = int(kwargs["trim"] * len(kwargs["total_times"]))
542  return {
543  "total_time_avg": round(numpy.mean(kwargs["total_times"]), 1),
544  "total_time_min": round(numpy.min(kwargs["total_times"]), 1),
545  "total_time_max": round(numpy.max(kwargs["total_times"]), 1),
546  "total_time_85": round(numpy.percentile(kwargs["total_times"], 85), 1),
547  "total_time_trimmed_avg": round(
548  numpy.mean(
549  numpy.sort(kwargs["total_times"])[trim_size:-trim_size]
550  ),
551  1,
552  )
553  if trim_size
554  else round(numpy.mean(kwargs["total_times"]), 1),
555  "total_times": kwargs["total_times"],
556  "execution_time_avg": round(numpy.mean(kwargs["execution_times"]), 1),
557  "execution_time_min": round(numpy.min(kwargs["execution_times"]), 1),
558  "execution_time_max": round(numpy.max(kwargs["execution_times"]), 1),
559  "execution_time_85": round(
560  numpy.percentile(kwargs["execution_times"], 85), 1
561  ),
562  "execution_time_25": round(
563  numpy.percentile(kwargs["execution_times"], 25), 1
564  ),
565  "execution_time_std": round(numpy.std(kwargs["execution_times"]), 1),
566  "execution_time_trimmed_avg": round(
567  numpy.mean(
568  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
569  )
570  )
571  if trim_size > 0
572  else round(numpy.mean(kwargs["execution_times"]), 1),
573  "execution_time_trimmed_max": round(
574  numpy.max(
575  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
576  )
577  )
578  if trim_size > 0
579  else round(numpy.max(kwargs["execution_times"]), 1),
580  "execution_times": kwargs["execution_times"],
581  "connect_time_avg": round(numpy.mean(kwargs["connect_times"]), 1),
582  "connect_time_min": round(numpy.min(kwargs["connect_times"]), 1),
583  "connect_time_max": round(numpy.max(kwargs["connect_times"]), 1),
584  "connect_time_85": round(
585  numpy.percentile(kwargs["connect_times"], 85), 1
586  ),
587  "results_iter_time_avg": round(
588  numpy.mean(kwargs["results_iter_times"]), 1
589  ),
590  "results_iter_time_min": round(
591  numpy.min(kwargs["results_iter_times"]), 1
592  ),
593  "results_iter_time_max": round(
594  numpy.max(kwargs["results_iter_times"]), 1
595  ),
596  "results_iter_time_85": round(
597  numpy.percentile(kwargs["results_iter_times"], 85), 1
598  ),
599  }
600 
def calculate_query_times

+ Here is the caller graph for this function:

def run_benchmark.clear_memory (   kwargs)
  Clears CPU or GPU memory

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection
    mem_type(str): [gpu, cpu] Type of memory to clear

  Returns:
      None

Definition at line 601 of file run_benchmark.py.

Referenced by run_query().

602 def clear_memory(**kwargs):
603  """
604  Clears CPU or GPU memory
605 
606  Kwargs:
607  con(class 'pymapd.connection.Connection'): Mapd connection
608  mem_type(str): [gpu, cpu] Type of memory to clear
609 
610  Returns:
611  None
612  """
613  try:
614  session = kwargs["con"]._session
615  mem_type = kwargs["mem_type"]
616  if mem_type == 'cpu':
617  kwargs["con"]._client.clear_cpu_memory(session)
618  elif mem_type == 'gpu':
619  kwargs["con"]._client.clear_gpu_memory(session)
620  else:
621  raise TypeError("Unkown mem_type '" + str(mem_type) + "' supplied to 'clear_memory'")
622  except Exception as e:
623  errormessage = "Clear memory failed with error: " + str(e)
624  logging.error(errormessage)
625 

+ Here is the caller graph for this function:

def run_benchmark.clear_system_caches ( )
  Clears system caches

Definition at line 626 of file run_benchmark.py.

Referenced by run_query().

627 def clear_system_caches():
628  """
629  Clears system caches
630  """
631  try:
632  os.system('sudo sh -c "/bin/echo 3 > /proc/sys/vm/drop_caches"')
633  except Exception as e:
634  errormessage = "Clear system caches failed with error: " + str(e)
635  logging.error(errormessage)
636 

+ Here is the caller graph for this function:

def run_benchmark.create_results_dataset (   kwargs)
  Create results dataset

  Kwargs:
    run_guid(str): Run GUID
    run_timestamp(datetime): Run timestamp
    run_connection(str): Connection string
    run_machine_name(str): Run machine name
    run_machine_uname(str): Run machine uname
    run_driver(str): Run driver
    run_version(str): Version of DB
    run_version_short(str): Shortened version of DB
    label(str): Run label
    source_db_gpu_count(int): Number of GPUs on run machine
    source_db_gpu_driver_ver(str): GPU driver version
    source_db_gpu_name(str): GPU name
    source_db_gpu_mem(str): Amount of GPU mem on run machine
    source_table(str): Table to run query against
    trim(float): Trim decimal to remove from top and bottom of results
    iterations(int): Number of iterations of each query to run
    query_group(str): Query group, usually matches table name
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query

  Returns:
    results_dataset(list):::
        result_dataset(dict): Query results dataset

Definition at line 931 of file run_benchmark.py.

References calculate_query_times().

Referenced by benchmark().

932 def create_results_dataset(**kwargs):
933  """
934  Create results dataset
935 
936  Kwargs:
937  run_guid(str): Run GUID
938  run_timestamp(datetime): Run timestamp
939  run_connection(str): Connection string
940  run_machine_name(str): Run machine name
941  run_machine_uname(str): Run machine uname
942  run_driver(str): Run driver
943  run_version(str): Version of DB
944  run_version_short(str): Shortened version of DB
945  label(str): Run label
946  source_db_gpu_count(int): Number of GPUs on run machine
947  source_db_gpu_driver_ver(str): GPU driver version
948  source_db_gpu_name(str): GPU name
949  source_db_gpu_mem(str): Amount of GPU mem on run machine
950  source_table(str): Table to run query against
951  trim(float): Trim decimal to remove from top and bottom of results
952  iterations(int): Number of iterations of each query to run
953  query_group(str): Query group, usually matches table name
954  query_results(dict):::
955  query_name(str): Name of query
956  query_mapdql(str): Query to run
957  query_id(str): Query ID
958  query_succeeded(bool): Query succeeded
959  query_error_info(str): Query error info
960  result_count(int): Number of results returned
961  initial_iteration_results(dict):::
962  first_execution_time(float): Execution time for first query
963  iteration
964  first_connect_time(float): Connect time for first query
965  iteration
966  first_results_iter_time(float): Results iteration time for
967  first query iteration
968  first_total_time(float): Total time for first iteration
969  first_cpu_mem_usage(float): CPU memory usage for first query
970  iteration
971  first_gpu_mem_usage(float): GPU memory usage for first query
972  iteration
973  noninitial_iteration_results(list):::
974  execution_time(float): Time (in ms) that pymapd reports
975  backend spent on query.
976  connect_time(float): Time (in ms) for overhead of query,
977  calculated by subtracting backend execution time from
978  time spent on the execution function.
979  results_iter_time(float): Time (in ms) it took to for
980  pymapd.fetchone() to iterate through all of the results.
981  total_time(float): Time (in ms) from adding all above times.
982  query_total_elapsed_time(int): Total elapsed time for query
983 
984  Returns:
985  results_dataset(list):::
986  result_dataset(dict): Query results dataset
987  """
988  results_dataset = []
989  for query_results in kwargs["queries_results"]:
990  if query_results["query_succeeded"]:
991  # Aggregate iteration values
992  execution_times, connect_times, results_iter_times, total_times = (
993  [],
994  [],
995  [],
996  [],
997  )
998  detailed_timing_last_iteration = {}
999  if len(query_results["noninitial_iteration_results"]) == 0:
1000  # A single query run (most likely a setup or teardown query)
1001  initial_result = query_results["initial_iteration_results"]
1002  execution_times.append(initial_result["first_execution_time"])
1003  connect_times.append(initial_result["first_connect_time"])
1004  results_iter_times.append(
1005  initial_result["first_results_iter_time"]
1006  )
1007  total_times.append(initial_result["first_total_time"])
1008  # Special case
1009  result_count = 1
1010  else:
1011  # More than one query run
1012  for noninitial_result in query_results[
1013  "noninitial_iteration_results"
1014  ]:
1015  execution_times.append(noninitial_result["execution_time"])
1016  connect_times.append(noninitial_result["connect_time"])
1017  results_iter_times.append(
1018  noninitial_result["results_iter_time"]
1019  )
1020  total_times.append(noninitial_result["total_time"])
1021  # Overwrite result count, same for each iteration
1022  result_count = noninitial_result["result_count"]
1023 
1024  # If available, getting the last iteration's component-wise timing information as a json structure
1025  if (
1026  query_results["noninitial_iteration_results"][-1]["debug_info"]
1027  ):
1028  detailed_timing_last_iteration = json.loads(
1029  query_results["noninitial_iteration_results"][-1][
1030  "debug_info"
1031  ]
1032  )["timer"]
1033  # Calculate query times
1034  logging.debug(
1035  "Calculating times from query " + query_results["query_id"]
1036  )
1037  query_times = calculate_query_times(
1038  total_times=total_times,
1039  execution_times=execution_times,
1040  connect_times=connect_times,
1041  results_iter_times=results_iter_times,
1042  trim=kwargs[
1043  "trim"
1044  ], # Trim top and bottom n% for trimmed calculations
1045  )
1046  result_dataset = {
1047  "name": query_results["query_name"],
1048  "mapdql": query_results["query_mapdql"],
1049  "succeeded": True,
1050  "results": {
1051  "run_guid": kwargs["run_guid"],
1052  "run_timestamp": kwargs["run_timestamp"],
1053  "run_connection": kwargs["run_connection"],
1054  "run_machine_name": kwargs["run_machine_name"],
1055  "run_machine_uname": kwargs["run_machine_uname"],
1056  "run_driver": kwargs["run_driver"],
1057  "run_version": kwargs["run_version"],
1058  "run_version_short": kwargs["run_version_short"],
1059  "run_label": kwargs["label"],
1060  "run_gpu_count": kwargs["source_db_gpu_count"],
1061  "run_gpu_driver_ver": kwargs["source_db_gpu_driver_ver"],
1062  "run_gpu_name": kwargs["source_db_gpu_name"],
1063  "run_gpu_mem_mb": kwargs["source_db_gpu_mem"],
1064  "run_table": kwargs["source_table"],
1065  "query_group": kwargs["query_group"],
1066  "query_id": query_results["query_id"],
1067  "query_result_set_count": result_count,
1068  "query_error_info": query_results["query_error_info"],
1069  "query_conn_first": query_results[
1070  "initial_iteration_results"
1071  ]["first_connect_time"],
1072  "query_conn_avg": query_times["connect_time_avg"],
1073  "query_conn_min": query_times["connect_time_min"],
1074  "query_conn_max": query_times["connect_time_max"],
1075  "query_conn_85": query_times["connect_time_85"],
1076  "query_exec_first": query_results[
1077  "initial_iteration_results"
1078  ]["first_execution_time"],
1079  "query_exec_avg": query_times["execution_time_avg"],
1080  "query_exec_min": query_times["execution_time_min"],
1081  "query_exec_max": query_times["execution_time_max"],
1082  "query_exec_85": query_times["execution_time_85"],
1083  "query_exec_25": query_times["execution_time_25"],
1084  "query_exec_stdd": query_times["execution_time_std"],
1085  "query_exec_trimmed_avg": query_times[
1086  "execution_time_trimmed_avg"
1087  ],
1088  "query_exec_trimmed_max": query_times[
1089  "execution_time_trimmed_max"
1090  ],
1091  # Render queries not supported yet
1092  "query_render_first": None,
1093  "query_render_avg": None,
1094  "query_render_min": None,
1095  "query_render_max": None,
1096  "query_render_85": None,
1097  "query_render_25": None,
1098  "query_render_stdd": None,
1099  "query_total_first": query_results[
1100  "initial_iteration_results"
1101  ]["first_total_time"],
1102  "query_total_avg": query_times["total_time_avg"],
1103  "query_total_min": query_times["total_time_min"],
1104  "query_total_max": query_times["total_time_max"],
1105  "query_total_85": query_times["total_time_85"],
1106  "query_total_all": query_results[
1107  "query_total_elapsed_time"
1108  ],
1109  "query_total_trimmed_avg": query_times[
1110  "total_time_trimmed_avg"
1111  ],
1112  "results_iter_count": kwargs["iterations"],
1113  "results_iter_first": query_results[
1114  "initial_iteration_results"
1115  ]["first_results_iter_time"],
1116  "results_iter_avg": query_times["results_iter_time_avg"],
1117  "results_iter_min": query_times["results_iter_time_min"],
1118  "results_iter_max": query_times["results_iter_time_max"],
1119  "results_iter_85": query_times["results_iter_time_85"],
1120  "cpu_mem_usage_mb": query_results[
1121  "initial_iteration_results"
1122  ]["first_cpu_mem_usage"],
1123  "gpu_mem_usage_mb": query_results[
1124  "initial_iteration_results"
1125  ]["first_gpu_mem_usage"],
1126  },
1127  "debug": {
1128  "query_exec_times": query_times["execution_times"],
1129  "query_total_times": query_times["total_times"],
1130  "detailed_timing_last_iteration": detailed_timing_last_iteration,
1131  },
1132  }
1133  elif not query_results["query_succeeded"]:
1134  result_dataset = {
1135  "name": query_results["query_name"],
1136  "mapdql": query_results["query_mapdql"],
1137  "succeeded": False,
1138  }
1139  results_dataset.append(result_dataset)
1140  logging.debug("All values set for query " + query_results["query_id"])
1141  return results_dataset
1142 
def create_results_dataset
def calculate_query_times

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.execute_query (   kwargs)
  Executes a query against the connected db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#querying

  Kwargs:
    query_name(str): Name of query
    query_mapdql(str): Query to run
    iteration(int): Iteration number
    con(class): Connection class

  Returns:
    query_execution(dict):::
      result_count(int): Number of results returned
      execution_time(float): Time (in ms) that pymapd reports
                             backend spent on query.
      connect_time(float): Time (in ms) for overhead of query, calculated
                           by subtracting backend execution time
                           from time spent on the execution function.
      results_iter_time(float): Time (in ms) it took to for
                                pymapd.fetchone() to iterate through all
                                of the results.
      total_time(float): Time (in ms) from adding all above times.
    False(bool): The query failed. Exception should be logged.

Definition at line 441 of file run_benchmark.py.

Referenced by run_query().

442 def execute_query(**kwargs):
443  """
444  Executes a query against the connected db using pymapd
445  https://pymapd.readthedocs.io/en/latest/usage.html#querying
446 
447  Kwargs:
448  query_name(str): Name of query
449  query_mapdql(str): Query to run
450  iteration(int): Iteration number
451  con(class): Connection class
452 
453  Returns:
454  query_execution(dict):::
455  result_count(int): Number of results returned
456  execution_time(float): Time (in ms) that pymapd reports
457  backend spent on query.
458  connect_time(float): Time (in ms) for overhead of query, calculated
459  by subtracting backend execution time
460  from time spent on the execution function.
461  results_iter_time(float): Time (in ms) it took to for
462  pymapd.fetchone() to iterate through all
463  of the results.
464  total_time(float): Time (in ms) from adding all above times.
465  False(bool): The query failed. Exception should be logged.
466  """
467  start_time = timeit.default_timer()
468  try:
469  # Run the query
470  query_result = kwargs["con"].execute(kwargs["query_mapdql"])
471  logging.debug(
472  "Completed iteration "
473  + str(kwargs["iteration"])
474  + " of query "
475  + kwargs["query_name"]
476  )
477  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
478  logging.exception(
479  "Error running query "
480  + kwargs["query_name"]
481  + " during iteration "
482  + str(kwargs["iteration"])
483  )
484  return False
485 
486  # Calculate times
487  query_elapsed_time = (timeit.default_timer() - start_time) * 1000
488  execution_time = query_result._result.execution_time_ms
489  debug_info = query_result._result.debug
490  connect_time = round((query_elapsed_time - execution_time), 1)
491  # Iterate through each result from the query
492  logging.debug(
493  "Counting results from query"
494  + kwargs["query_name"]
495  + " iteration "
496  + str(kwargs["iteration"])
497  )
498  result_count = 0
499  start_time = timeit.default_timer()
500  while query_result.fetchone():
501  result_count += 1
502  results_iter_time = round(
503  ((timeit.default_timer() - start_time) * 1000), 1
504  )
505  query_execution = {
506  "result_count": result_count,
507  "execution_time": execution_time,
508  "connect_time": connect_time,
509  "results_iter_time": results_iter_time,
510  "total_time": execution_time + connect_time + results_iter_time,
511  "debug_info": debug_info,
512  }
513  logging.debug(
514  "Execution results for query"
515  + kwargs["query_name"]
516  + " iteration "
517  + str(kwargs["iteration"])
518  + ": "
519  + str(query_execution)
520  )
521  return query_execution
522 

+ Here is the caller graph for this function:

def run_benchmark.get_connection (   kwargs)
  Connects to the db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#connecting

  Kwargs:
    db_user(str): DB username
    db_passwd(str): DB password
    db_server(str): DB host
    db_port(int): DB port
    db_name(str): DB name

  Returns:
    con(class): Connection class
    False(bool): The connection failed. Exception should be logged.

Definition at line 61 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and send_results_db().

61 
62 def get_connection(**kwargs):
63  """
64  Connects to the db using pymapd
65  https://pymapd.readthedocs.io/en/latest/usage.html#connecting
66 
67  Kwargs:
68  db_user(str): DB username
69  db_passwd(str): DB password
70  db_server(str): DB host
71  db_port(int): DB port
72  db_name(str): DB name
73 
74  Returns:
75  con(class): Connection class
76  False(bool): The connection failed. Exception should be logged.
77  """
78  try:
79  logging.debug("Connecting to mapd db...")
80  con = pymapd.connect(
81  user=kwargs["db_user"],
82  password=kwargs["db_passwd"],
83  host=kwargs["db_server"],
84  port=kwargs["db_port"],
85  dbname=kwargs["db_name"],
86  )
87  logging.info("Succesfully connected to mapd db")
88  return con
89  except (pymapd.exceptions.OperationalError, pymapd.exceptions.Error):
90  logging.exception("Error connecting to database.")
91  return False
92 

+ Here is the caller graph for this function:

def run_benchmark.get_gpu_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    gpu_name(str): GPU name from input param
    no_gather_conn_gpu_info(bool): Gather GPU info fields
    con(class 'pymapd.connection.Connection'): Mapd connection
    conn_machine_name(str): Name of run machine
    no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
    gather_nvml_gpu_info(bool): Gather GPU info using nvml
    gpu_count(int): Number of GPUs on run machine

  Returns:
    gpu_info(dict):::
        conn_gpu_count(int): Number of GPUs gathered from pymapd con
        source_db_gpu_count(int): Number of GPUs on run machine
        source_db_gpu_mem(str): Amount of GPU mem on run machine
        source_db_gpu_driver_ver(str): GPU driver version
        source_db_gpu_name(str): GPU name

Definition at line 134 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

135 def get_gpu_info(**kwargs):
136  """
137  Gets run machine GPU info
138 
139  Kwargs:
140  gpu_name(str): GPU name from input param
141  no_gather_conn_gpu_info(bool): Gather GPU info fields
142  con(class 'pymapd.connection.Connection'): Mapd connection
143  conn_machine_name(str): Name of run machine
144  no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
145  gather_nvml_gpu_info(bool): Gather GPU info using nvml
146  gpu_count(int): Number of GPUs on run machine
147 
148  Returns:
149  gpu_info(dict):::
150  conn_gpu_count(int): Number of GPUs gathered from pymapd con
151  source_db_gpu_count(int): Number of GPUs on run machine
152  source_db_gpu_mem(str): Amount of GPU mem on run machine
153  source_db_gpu_driver_ver(str): GPU driver version
154  source_db_gpu_name(str): GPU name
155  """
156  # Set GPU info fields
157  conn_gpu_count = None
158  source_db_gpu_count = None
159  source_db_gpu_mem = None
160  source_db_gpu_driver_ver = ""
161  source_db_gpu_name = ""
162  if kwargs["no_gather_conn_gpu_info"]:
163  logging.debug(
164  "--no-gather-conn-gpu-info passed, "
165  + "using blank values for source database GPU info fields "
166  + "[run_gpu_count, run_gpu_mem_mb] "
167  )
168  else:
169  logging.debug(
170  "Gathering source database GPU info fields "
171  + "[run_gpu_count, run_gpu_mem_mb] "
172  + "using pymapd connection info. "
173  )
174  conn_hardware_info = kwargs["con"]._client.get_hardware_info(
175  kwargs["con"]._session
176  )
177  conn_gpu_count = conn_hardware_info.hardware_info[0].num_gpu_allocated
178  if conn_gpu_count == 0 or conn_gpu_count is None:
179  no_gather_nvml_gpu_info = True
180  if conn_gpu_count == 0:
181  logging.warning(
182  "0 GPUs detected from connection info, "
183  + "using blank values for source database GPU info fields "
184  + "If running against cpu-only server, make sure to set "
185  + "--no-gather-nvml-gpu-info and --no-gather-conn-gpu-info."
186  )
187  else:
188  no_gather_nvml_gpu_info = kwargs["no_gather_nvml_gpu_info"]
189  source_db_gpu_count = conn_gpu_count
190  try:
191  source_db_gpu_mem = int(
192  conn_hardware_info.hardware_info[0].gpu_info[0].memory
193  / 1000000
194  )
195  except IndexError:
196  logging.error("GPU memory info not available from connection.")
197  if no_gather_nvml_gpu_info:
198  logging.debug(
199  "--no-gather-nvml-gpu-info passed, "
200  + "using blank values for source database GPU info fields "
201  + "[gpu_driver_ver, run_gpu_name] "
202  )
203  elif (
204  kwargs["conn_machine_name"] == "localhost"
205  or kwargs["gather_nvml_gpu_info"]
206  ):
207  logging.debug(
208  "Gathering source database GPU info fields "
209  + "[gpu_driver_ver, run_gpu_name] "
210  + "from local GPU using pynvml. "
211  )
212  import pynvml
213 
214  pynvml.nvmlInit()
215  source_db_gpu_driver_ver = pynvml.nvmlSystemGetDriverVersion().decode()
216  for i in range(source_db_gpu_count):
217  handle = pynvml.nvmlDeviceGetHandleByIndex(i)
218  # Assume all cards are the same, overwrite name value
219  source_db_gpu_name = pynvml.nvmlDeviceGetName(handle).decode()
220  pynvml.nvmlShutdown()
221  # If gpu_count argument passed in, override gathered value
222  if kwargs["gpu_count"]:
223  source_db_gpu_count = kwargs["gpu_count"]
224  if kwargs["gpu_name"]:
225  source_db_gpu_name = kwargs["gpu_name"]
226  gpu_info = {
227  "conn_gpu_count": conn_gpu_count,
228  "source_db_gpu_count": source_db_gpu_count,
229  "source_db_gpu_mem": source_db_gpu_mem,
230  "source_db_gpu_driver_ver": source_db_gpu_driver_ver,
231  "source_db_gpu_name": source_db_gpu_name,
232  }
233  return gpu_info
234 

+ Here is the caller graph for this function:

def run_benchmark.get_machine_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    conn_machine_name(str): Name of machine from pymapd con
    machine_name(str): Name of machine if passed in
    machine_uname(str): Uname of machine if passed in

  Returns:
    machine_info(dict):::
        run_machine_name(str): Run machine name
        run_machine_uname(str): Run machine uname

Definition at line 235 of file run_benchmark.py.

References join().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

236 def get_machine_info(**kwargs):
237  """
238  Gets run machine GPU info
239 
240  Kwargs:
241  conn_machine_name(str): Name of machine from pymapd con
242  machine_name(str): Name of machine if passed in
243  machine_uname(str): Uname of machine if passed in
244 
245  Returns:
246  machine_info(dict):::
247  run_machine_name(str): Run machine name
248  run_machine_uname(str): Run machine uname
249  """
250  # Set machine names, using local info if connected to localhost
251  if kwargs["conn_machine_name"] == "localhost":
252  local_uname = os.uname()
253  # If --machine-name passed in, override pymapd con value
254  if kwargs["machine_name"]:
255  run_machine_name = kwargs["machine_name"]
256  else:
257  if kwargs["conn_machine_name"] == "localhost":
258  run_machine_name = local_uname.nodename.split(".")[0]
259  else:
260  run_machine_name = kwargs["conn_machine_name"]
261  # If --machine-uname passed in, override pymapd con value
262  if kwargs["machine_uname"]:
263  run_machine_uname = kwargs["machine_uname"]
264  else:
265  if kwargs["conn_machine_name"] == "localhost":
266  run_machine_uname = " ".join(local_uname)
267  else:
268  run_machine_uname = ""
269  machine_info = {
270  "run_machine_name": run_machine_name,
271  "run_machine_uname": run_machine_uname,
272  }
273  return machine_info
274 
std::string join(T const &container, std::string const &delim)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.get_mem_usage (   kwargs)
  Calculates memory statistics from mapd_server _client.get_memory call

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection
    mem_type(str): [gpu, cpu] Type of memory to gather metrics for

  Returns:
    ramusage(dict):::
      usedram(float): Amount of memory (in MB) used
      freeram(float): Amount of memory (in MB) free
      totalallocated(float): Total amount of memory (in MB) allocated
      errormessage(str): Error if returned by get_memory call
      rawdata(list): Raw data returned from get_memory call

Definition at line 637 of file run_benchmark.py.

Referenced by run_benchmark_arrow.run_query(), and run_query().

638 def get_mem_usage(**kwargs):
639  """
640  Calculates memory statistics from mapd_server _client.get_memory call
641 
642  Kwargs:
643  con(class 'pymapd.connection.Connection'): Mapd connection
644  mem_type(str): [gpu, cpu] Type of memory to gather metrics for
645 
646  Returns:
647  ramusage(dict):::
648  usedram(float): Amount of memory (in MB) used
649  freeram(float): Amount of memory (in MB) free
650  totalallocated(float): Total amount of memory (in MB) allocated
651  errormessage(str): Error if returned by get_memory call
652  rawdata(list): Raw data returned from get_memory call
653  """
654  try:
655  con_mem_data_list = kwargs["con"]._client.get_memory(
656  session=kwargs["con"]._session, memory_level=kwargs["mem_type"]
657  )
658  usedram = 0
659  freeram = 0
660  for con_mem_data in con_mem_data_list:
661  page_size = con_mem_data.page_size
662  node_memory_data_list = con_mem_data.node_memory_data
663  for node_memory_data in node_memory_data_list:
664  ram = node_memory_data.num_pages * page_size
665  is_free = node_memory_data.is_free
666  if is_free:
667  freeram += ram
668  else:
669  usedram += ram
670  totalallocated = usedram + freeram
671  if totalallocated > 0:
672  totalallocated = round(totalallocated / 1024 / 1024, 1)
673  usedram = round(usedram / 1024 / 1024, 1)
674  freeram = round(freeram / 1024 / 1024, 1)
675  ramusage = {}
676  ramusage["usedram"] = usedram
677  ramusage["freeram"] = freeram
678  ramusage["totalallocated"] = totalallocated
679  ramusage["errormessage"] = ""
680  except Exception as e:
681  errormessage = "Get memory failed with error: " + str(e)
682  logging.error(errormessage)
683  ramusage["errormessage"] = errormessage
684  return ramusage
685 

+ Here is the caller graph for this function:

def run_benchmark.get_run_vars (   kwargs)
  Gets/sets run-specific vars such as time, uid, etc.

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    run_vars(dict):::
        run_guid(str): Run GUID
        run_timestamp(datetime): Run timestamp
        run_connection(str): Connection string
        run_driver(str): Run driver
        run_version(str): Version of DB
        run_version_short(str): Shortened version of DB
        conn_machine_name(str): Name of run machine

Definition at line 93 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

93 
94 def get_run_vars(**kwargs):
95  """
96  Gets/sets run-specific vars such as time, uid, etc.
97 
98  Kwargs:
99  con(class 'pymapd.connection.Connection'): Mapd connection
100 
101  Returns:
102  run_vars(dict):::
103  run_guid(str): Run GUID
104  run_timestamp(datetime): Run timestamp
105  run_connection(str): Connection string
106  run_driver(str): Run driver
107  run_version(str): Version of DB
108  run_version_short(str): Shortened version of DB
109  conn_machine_name(str): Name of run machine
110  """
111  run_guid = str(uuid.uuid4())
112  logging.debug("Run guid: " + run_guid)
113  run_timestamp = datetime.datetime.now()
114  run_connection = str(kwargs["con"])
115  logging.debug("Connection string: " + run_connection)
116  run_driver = "" # TODO
117  run_version = kwargs["con"]._client.get_version()
118  if "-" in run_version:
119  run_version_short = run_version.split("-")[0]
120  else:
121  run_version_short = run_version
122  conn_machine_name = re.search(r"@(.*?):", run_connection).group(1)
123  run_vars = {
124  "run_guid": run_guid,
125  "run_timestamp": run_timestamp,
126  "run_connection": run_connection,
127  "run_driver": run_driver,
128  "run_version": run_version,
129  "run_version_short": run_version_short,
130  "conn_machine_name": conn_machine_name,
131  }
132  return run_vars
133 

+ Here is the caller graph for this function:

def run_benchmark.json_format_handler (   x)

Definition at line 922 of file run_benchmark.py.

923 def json_format_handler(x):
924  # Function to allow json to deal with datetime and numpy int
925  if isinstance(x, datetime.datetime):
926  return x.isoformat()
927  if isinstance(x, numpy.int64):
928  return int(x)
929  raise TypeError("Unknown type")
930 
def run_benchmark.process_arguments (   input_arguments)

Definition at line 1325 of file run_benchmark.py.

Referenced by benchmark().

1326 def process_arguments(input_arguments):
1327  # Parse input parameters
1328  parser = ArgumentParser()
1329  optional = parser._action_groups.pop()
1330  required = parser.add_argument_group("required arguments")
1331  parser._action_groups.append(optional)
1332  optional.add_argument(
1333  "-v", "--verbose", action="store_true", help="Turn on debug logging"
1334  )
1335  optional.add_argument(
1336  "-q",
1337  "--quiet",
1338  action="store_true",
1339  help="Suppress script outuput " + "(except warnings and errors)",
1340  )
1341  required.add_argument(
1342  "-u",
1343  "--user",
1344  dest="user",
1345  default="mapd",
1346  help="Source database user",
1347  )
1348  required.add_argument(
1349  "-p",
1350  "--passwd",
1351  dest="passwd",
1352  default="HyperInteractive",
1353  help="Source database password",
1354  )
1355  required.add_argument(
1356  "-s",
1357  "--server",
1358  dest="server",
1359  default="localhost",
1360  help="Source database server hostname",
1361  )
1362  optional.add_argument(
1363  "-o",
1364  "--port",
1365  dest="port",
1366  type=int,
1367  default=6274,
1368  help="Source database server port",
1369  )
1370  required.add_argument(
1371  "-n",
1372  "--name",
1373  dest="name",
1374  default="mapd",
1375  help="Source database name",
1376  )
1377  required.add_argument(
1378  "-t",
1379  "--table",
1380  dest="table",
1381  required=True,
1382  help="Source db table name",
1383  )
1384  required.add_argument(
1385  "-l",
1386  "--label",
1387  dest="label",
1388  required=True,
1389  help="Benchmark run label",
1390  )
1391  required.add_argument(
1392  "-d",
1393  "--queries-dir",
1394  dest="queries_dir",
1395  help='Absolute path to dir with query files. \
1396  [Default: "queries" dir in same location as script]',
1397  )
1398  required.add_argument(
1399  "-i",
1400  "--iterations",
1401  dest="iterations",
1402  type=int,
1403  required=True,
1404  help="Number of iterations per query. Must be > 1",
1405  )
1406  optional.add_argument(
1407  "-g",
1408  "--gpu-count",
1409  dest="gpu_count",
1410  type=int,
1411  default=None,
1412  help="Number of GPUs. Not required when gathering local gpu info",
1413  )
1414  optional.add_argument(
1415  "-G",
1416  "--gpu-name",
1417  dest="gpu_name",
1418  type=str,
1419  default="",
1420  help="Name of GPU(s). Not required when gathering local gpu info",
1421  )
1422  optional.add_argument(
1423  "--no-gather-conn-gpu-info",
1424  dest="no_gather_conn_gpu_info",
1425  action="store_true",
1426  help="Do not gather source database GPU info fields "
1427  + "[run_gpu_count, run_gpu_mem_mb] "
1428  + "using pymapd connection info. "
1429  + "Use when testing a CPU-only server.",
1430  )
1431  optional.add_argument(
1432  "--no-gather-nvml-gpu-info",
1433  dest="no_gather_nvml_gpu_info",
1434  action="store_true",
1435  help="Do not gather source database GPU info fields "
1436  + "[gpu_driver_ver, run_gpu_name] "
1437  + "from local GPU using pynvml. "
1438  + 'Defaults to True when source server is not "localhost". '
1439  + "Use when testing a CPU-only server.",
1440  )
1441  optional.add_argument(
1442  "--gather-nvml-gpu-info",
1443  dest="gather_nvml_gpu_info",
1444  action="store_true",
1445  help="Gather source database GPU info fields "
1446  + "[gpu_driver_ver, run_gpu_name] "
1447  + "from local GPU using pynvml. "
1448  + 'Defaults to True when source server is "localhost". '
1449  + "Only use when benchmarking against same machine that this script "
1450  + "is run from.",
1451  )
1452  optional.add_argument(
1453  "-m",
1454  "--machine-name",
1455  dest="machine_name",
1456  help="Name of source machine",
1457  )
1458  optional.add_argument(
1459  "-a",
1460  "--machine-uname",
1461  dest="machine_uname",
1462  help="Uname info from " + "source machine",
1463  )
1464  optional.add_argument(
1465  "-e",
1466  "--destination",
1467  dest="destination",
1468  default="mapd_db",
1469  help="Destination type: [mapd_db, file_json, output, jenkins_bench] "
1470  + "Multiple values can be input seperated by commas, "
1471  + 'ex: "mapd_db,file_json"',
1472  )
1473  optional.add_argument(
1474  "-U",
1475  "--dest-user",
1476  dest="dest_user",
1477  default="mapd",
1478  help="Destination mapd_db database user",
1479  )
1480  optional.add_argument(
1481  "-P",
1482  "--dest-passwd",
1483  dest="dest_passwd",
1484  default="HyperInteractive",
1485  help="Destination mapd_db database password",
1486  )
1487  optional.add_argument(
1488  "-S",
1489  "--dest-server",
1490  dest="dest_server",
1491  help="Destination mapd_db database server hostname"
1492  + ' (required if destination = "mapd_db")',
1493  )
1494  optional.add_argument(
1495  "-O",
1496  "--dest-port",
1497  dest="dest_port",
1498  type=int,
1499  default=6274,
1500  help="Destination mapd_db database server port",
1501  )
1502  optional.add_argument(
1503  "-N",
1504  "--dest-name",
1505  dest="dest_name",
1506  default="mapd",
1507  help="Destination mapd_db database name",
1508  )
1509  optional.add_argument(
1510  "-T",
1511  "--dest-table",
1512  dest="dest_table",
1513  default="results",
1514  help="Destination mapd_db table name",
1515  )
1516  optional.add_argument(
1517  "-C",
1518  "--dest-table-schema-file",
1519  dest="dest_table_schema_file",
1520  default="results_table_schemas/query-results.sql",
1521  help="Destination table schema file. This must be an executable "
1522  + "CREATE TABLE statement that matches the output of this script. It "
1523  + "is required when creating the results table. Default location is "
1524  + 'in "./results_table_schemas/query-results.sql"',
1525  )
1526  optional.add_argument(
1527  "-j",
1528  "--output-file-json",
1529  dest="output_file_json",
1530  help="Absolute path of .json output file "
1531  + '(required if destination = "file_json")',
1532  )
1533  optional.add_argument(
1534  "-J",
1535  "--output-file-jenkins",
1536  dest="output_file_jenkins",
1537  help="Absolute path of jenkins benchmark .json output file "
1538  + '(required if destination = "jenkins_bench")',
1539  )
1540  optional.add_argument(
1541  "-E",
1542  "--output-tag-jenkins",
1543  dest="output_tag_jenkins",
1544  default="",
1545  help="Jenkins benchmark result tag. "
1546  + 'Optional, appended to table name in "group" field',
1547  )
1548  optional.add_argument(
1549  "--setup-teardown-queries-dir",
1550  dest="setup_teardown_queries_dir",
1551  type=str,
1552  default=None,
1553  help='Absolute path to dir with setup & teardown query files. '
1554  'Query files with "setup" in the filename will be executed in '
1555  'the setup stage, likewise query files with "teardown" in '
1556  'the filenname will be executed in the tear-down stage. Queries '
1557  'execute in lexical order. [Default: None, meaning this option is '
1558  'not used.]',
1559  )
1560  optional.add_argument(
1561  "--clear-all-memory-pre-query",
1562  dest="clear_all_memory_pre_query",
1563  action="store_true",
1564  help='Clear gpu & cpu memory before every query.'
1565  ' [Default: False]'
1566  )
1567  optional.add_argument(
1568  "--run-setup-teardown-per-query",
1569  dest="run_setup_teardown_per_query",
1570  action="store_true",
1571  help='Run setup & teardown steps per query. '
1572  'If set, setup-teardown-queries-dir must be specified. '
1573  'If not set, but setup-teardown-queries-dir is specified '
1574  'setup & tear-down queries will run globally, that is, '
1575  'once per script invocation.'
1576  ' [Default: False]'
1577  )
1578  optional.add_argument(
1579  "-F",
1580  "--foreign-table-filename",
1581  dest="foreign_table_filename",
1582  default=None,
1583  help="Path to file containing template for import query. "
1584  "Path must be relative to the FOREIGN SERVER. "
1585  "Occurances of \"##FILE##\" within setup/teardown queries will be"
1586  " replaced with this. "
1587  )
1588  optional.add_argument(
1589  "--jenkins-thresholds-name",
1590  dest="jenkins_thresholds_name",
1591  default="average",
1592  help="Name of Jenkins output field.",
1593  )
1594  optional.add_argument(
1595  "--jenkins-thresholds-field",
1596  dest="jenkins_thresholds_field",
1597  default="query_exec_trimmed_avg",
1598  help="Field to report as jenkins output value.",
1599  )
1600  args = parser.parse_args(args=input_arguments)
1601  return args
1602 

+ Here is the caller graph for this function:

def run_benchmark.read_query_files (   kwargs)
  Gets run machine GPU info

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against

  Returns:
    query_list(dict):::
        query_group(str): Query group, usually matches table name
        queries(list)
            query(dict):::
                name(str): Name of query
                mapdql(str): Query syntax to run
    False(bool): Unable to find queries dir

Definition at line 275 of file run_benchmark.py.

References File_Namespace.append(), omnisci.open(), split(), and validate_query_file().

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and read_setup_teardown_query_files().

276 def read_query_files(**kwargs):
277  """
278  Gets run machine GPU info
279 
280  Kwargs:
281  queries_dir(str): Directory with query files
282  source_table(str): Table to run query against
283 
284  Returns:
285  query_list(dict):::
286  query_group(str): Query group, usually matches table name
287  queries(list)
288  query(dict):::
289  name(str): Name of query
290  mapdql(str): Query syntax to run
291  False(bool): Unable to find queries dir
292  """
293  # Read query files contents and write to query_list
294  query_list = {"query_group": "", "queries": []}
295  query_group = kwargs["queries_dir"].split("/")[-1]
296  query_list.update(query_group=query_group)
297  logging.debug("Queries dir: " + kwargs["queries_dir"])
298  try:
299  for query_filename in sorted(os.listdir(kwargs["queries_dir"])):
300  logging.debug("Validating query filename: " + query_filename)
301  if validate_query_file(query_filename=query_filename):
302  with open(
303  kwargs["queries_dir"] + "/" + query_filename, "r"
304  ) as query_filepath:
305  logging.debug(
306  "Reading query with filename: " + query_filename
307  )
308  query_mapdql = query_filepath.read().replace("\n", " ")
309  query_mapdql = query_mapdql.replace(
310  "##TAB##", kwargs["source_table"]
311  )
312  query_list["queries"].append(
313  {"name": query_filename, "mapdql": query_mapdql}
314  )
315  logging.info("Read all query files")
316  return query_list
317  except FileNotFoundError:
318  logging.exception("Could not find queries directory.")
319  return False
320 
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:159
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.read_setup_teardown_query_files (   kwargs)
  Get queries to run for setup and teardown from directory

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against
    foreign_table_filename(str): File to create foreign table from

  Returns:
    setup_queries(query_list): List of setup queries
    teardown_queries(query_list): List of teardown queries
    False(bool): Unable to find queries dir

query_list is described by:
query_list(dict):::
    query_group(str): Query group, usually matches table name
    queries(list)
        query(dict):::
            name(str): Name of query
            mapdql(str): Query syntax to run

Definition at line 321 of file run_benchmark.py.

References read_query_files(), and validate_setup_teardown_query_file().

Referenced by benchmark().

322 def read_setup_teardown_query_files(**kwargs):
323  """
324  Get queries to run for setup and teardown from directory
325 
326  Kwargs:
327  queries_dir(str): Directory with query files
328  source_table(str): Table to run query against
329  foreign_table_filename(str): File to create foreign table from
330 
331  Returns:
332  setup_queries(query_list): List of setup queries
333  teardown_queries(query_list): List of teardown queries
334  False(bool): Unable to find queries dir
335 
336  query_list is described by:
337  query_list(dict):::
338  query_group(str): Query group, usually matches table name
339  queries(list)
340  query(dict):::
341  name(str): Name of query
342  mapdql(str): Query syntax to run
343  """
344  setup_teardown_queries_dir = kwargs['queries_dir']
345  source_table = kwargs['source_table']
346  # Read setup/tear-down queries if they exist
347  setup_teardown_query_list = None
348  if setup_teardown_queries_dir is not None:
349  setup_teardown_query_list = read_query_files(
350  queries_dir=setup_teardown_queries_dir,
351  source_table=source_table
352  )
353  if kwargs["foreign_table_filename"] is not None:
354  for query in setup_teardown_query_list['queries']:
355  query['mapdql'] = query['mapdql'].replace(
356  "##FILE##", kwargs["foreign_table_filename"])
357  # Filter setup queries
358  setup_query_list = None
359  if setup_teardown_query_list is not None:
360  setup_query_list = filter(
362  query_filename=x['name'], check_which='setup', quiet=True),
363  setup_teardown_query_list['queries'])
364  setup_query_list = list(setup_query_list)
365  # Filter teardown queries
366  teardown_query_list = None
367  if setup_teardown_query_list is not None:
368  teardown_query_list = filter(
370  query_filename=x['name'], check_which='teardown', quiet=True),
371  setup_teardown_query_list['queries'])
372  teardown_query_list = list(teardown_query_list)
373  return setup_query_list, teardown_query_list
374 
def validate_setup_teardown_query_file
def read_setup_teardown_query_files

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_query (   kwargs)
  Takes query name, syntax, and iteration count and calls the
    execute_query function for each iteration. Reports total, iteration,
    and exec timings, memory usage, and failure status.

  Kwargs:
    query(dict):::
        name(str): Name of query
        mapdql(str): Query syntax to run
    iterations(int): Number of iterations of each query to run
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection
    clear_all_memory_pre_query(bool,optional): Flag to determine if memory is cleared
    between query runs

  Returns:
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query
    False(bool): The query failed. Exception should be logged.

Definition at line 686 of file run_benchmark.py.

References File_Namespace.append(), clear_memory(), clear_system_caches(), execute_query(), and get_mem_usage().

Referenced by benchmark(), RelAlgExecutor.executeRelAlgQuery(), and run_setup_teardown_query().

687 def run_query(**kwargs):
688  """
689  Takes query name, syntax, and iteration count and calls the
690  execute_query function for each iteration. Reports total, iteration,
691  and exec timings, memory usage, and failure status.
692 
693  Kwargs:
694  query(dict):::
695  name(str): Name of query
696  mapdql(str): Query syntax to run
697  iterations(int): Number of iterations of each query to run
698  trim(float): Trim decimal to remove from top and bottom of results
699  con(class 'pymapd.connection.Connection'): Mapd connection
700  clear_all_memory_pre_query(bool,optional): Flag to determine if memory is cleared
701  between query runs
702 
703  Returns:
704  query_results(dict):::
705  query_name(str): Name of query
706  query_mapdql(str): Query to run
707  query_id(str): Query ID
708  query_succeeded(bool): Query succeeded
709  query_error_info(str): Query error info
710  result_count(int): Number of results returned
711  initial_iteration_results(dict):::
712  first_execution_time(float): Execution time for first query
713  iteration
714  first_connect_time(float): Connect time for first query
715  iteration
716  first_results_iter_time(float): Results iteration time for
717  first query iteration
718  first_total_time(float): Total time for first iteration
719  first_cpu_mem_usage(float): CPU memory usage for first query
720  iteration
721  first_gpu_mem_usage(float): GPU memory usage for first query
722  iteration
723  noninitial_iteration_results(list):::
724  execution_time(float): Time (in ms) that pymapd reports
725  backend spent on query.
726  connect_time(float): Time (in ms) for overhead of query,
727  calculated by subtracting backend execution time from
728  time spent on the execution function.
729  results_iter_time(float): Time (in ms) it took to for
730  pymapd.fetchone() to iterate through all of the results.
731  total_time(float): Time (in ms) from adding all above times.
732  query_total_elapsed_time(int): Total elapsed time for query
733  False(bool): The query failed. Exception should be logged.
734  """
735  logging.info(
736  "Running query: "
737  + kwargs["query"]["name"]
738  + " iterations: "
739  + str(kwargs["iterations"])
740  )
741  query_id = kwargs["query"]["name"].rsplit(".")[
742  0
743  ] # Query ID = filename without extention
744  query_results = {
745  "query_name": kwargs["query"]["name"],
746  "query_mapdql": kwargs["query"]["mapdql"],
747  "query_id": query_id,
748  "query_succeeded": True,
749  "query_error_info": "",
750  "initial_iteration_results": {},
751  "noninitial_iteration_results": [],
752  "query_total_elapsed_time": 0,
753  }
754  query_total_start_time = timeit.default_timer()
755  # Run iterations of query
756  for iteration in range(kwargs["iterations"]):
757  # Gather memory before running query iteration
758  logging.debug("Getting pre-query memory usage on CPU")
759  pre_query_cpu_mem_usage = get_mem_usage(
760  con=kwargs["con"], mem_type="cpu"
761  )
762  logging.debug("Getting pre-query memory usage on GPU")
763  pre_query_gpu_mem_usage = get_mem_usage(
764  con=kwargs["con"], mem_type="gpu"
765  )
766  if "clear_all_memory_pre_query" in kwargs and kwargs["clear_all_memory_pre_query"]:
767  # Clear GPU & CPU memory
768  clear_memory(
769  con=kwargs["con"], mem_type="cpu"
770  )
771  clear_memory(
772  con=kwargs["con"], mem_type="gpu"
773  )
775  # Run query iteration
776  logging.debug(
777  "Running iteration "
778  + str(iteration)
779  + " of query "
780  + kwargs["query"]["name"]
781  )
782  query_result = execute_query(
783  query_name=kwargs["query"]["name"],
784  query_mapdql=kwargs["query"]["mapdql"],
785  iteration=iteration,
786  con=kwargs["con"],
787  )
788  # Gather memory after running query iteration
789  logging.debug("Getting post-query memory usage on CPU")
790  post_query_cpu_mem_usage = get_mem_usage(
791  con=kwargs["con"], mem_type="cpu"
792  )
793  logging.debug("Getting post-query memory usage on GPU")
794  post_query_gpu_mem_usage = get_mem_usage(
795  con=kwargs["con"], mem_type="gpu"
796  )
797  # Calculate total (post minus pre) memory usage after query iteration
798  query_cpu_mem_usage = round(
799  post_query_cpu_mem_usage["usedram"]
800  - pre_query_cpu_mem_usage["usedram"],
801  1,
802  )
803  query_gpu_mem_usage = round(
804  post_query_gpu_mem_usage["usedram"]
805  - pre_query_gpu_mem_usage["usedram"],
806  1,
807  )
808  if query_result:
809  query_results.update(
810  query_error_info="" # TODO - interpret query error info
811  )
812  # Assign first query iteration times
813  if iteration == 0:
814  first_execution_time = round(query_result["execution_time"], 1)
815  first_connect_time = round(query_result["connect_time"], 1)
816  first_results_iter_time = round(
817  query_result["results_iter_time"], 1
818  )
819  first_total_time = (
820  first_execution_time
821  + first_connect_time
822  + first_results_iter_time
823  )
824  query_results.update(
825  initial_iteration_results={
826  "first_execution_time": first_execution_time,
827  "first_connect_time": first_connect_time,
828  "first_results_iter_time": first_results_iter_time,
829  "first_total_time": first_total_time,
830  "first_cpu_mem_usage": query_cpu_mem_usage,
831  "first_gpu_mem_usage": query_gpu_mem_usage,
832  }
833  )
834  else:
835  # Put noninitial iterations into query_result list
836  query_results["noninitial_iteration_results"].append(
837  query_result
838  )
839  # Verify no change in memory for noninitial iterations
840  if query_cpu_mem_usage != 0.0:
841  logging.error(
842  (
843  "Noninitial iteration ({0}) of query ({1}) "
844  + "shows non-zero CPU memory usage: {2}"
845  ).format(
846  iteration,
847  kwargs["query"]["name"],
848  query_cpu_mem_usage,
849  )
850  )
851  if query_gpu_mem_usage != 0.0:
852  logging.error(
853  (
854  "Noninitial iteration ({0}) of query ({1}) "
855  + "shows non-zero GPU memory usage: {2}"
856  ).format(
857  iteration,
858  kwargs["query"]["name"],
859  query_gpu_mem_usage,
860  )
861  )
862  else:
863  logging.warning(
864  "Error detected during execution of query: "
865  + kwargs["query"]["name"]
866  + ". This query will be skipped and "
867  + "times will not reported"
868  )
869  query_results.update(query_succeeded=False)
870  break
871  # Calculate time for all iterations to run
872  query_total_elapsed_time = round(
873  ((timeit.default_timer() - query_total_start_time) * 1000), 1
874  )
875  query_results.update(query_total_elapsed_time=query_total_elapsed_time)
876  logging.info(
877  "Completed all iterations of query " + kwargs["query"]["name"]
878  )
879  return query_results
880 
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:159

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_setup_teardown_query (   kwargs)
    Convenience wrapper around `run_query` to run a setup or 
    teardown query

  Kwargs:
    queries(query_list): List of queries to run
    do_run(bool): If true will run query, otherwise do nothing
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    See return value for `run_query`

    query_list is described by:
    queries(list)
        query(dict):::
            name(str): Name of query
            mapdql(str): Query syntax to run
            [setup : queries(list)]
            [teardown : queries(list)]

Definition at line 881 of file run_benchmark.py.

References run_query().

Referenced by benchmark().

882 def run_setup_teardown_query(**kwargs):
883  """
884  Convenience wrapper around `run_query` to run a setup or
885  teardown query
886 
887  Kwargs:
888  queries(query_list): List of queries to run
889  do_run(bool): If true will run query, otherwise do nothing
890  trim(float): Trim decimal to remove from top and bottom of results
891  con(class 'pymapd.connection.Connection'): Mapd connection
892 
893  Returns:
894  See return value for `run_query`
895 
896  query_list is described by:
897  queries(list)
898  query(dict):::
899  name(str): Name of query
900  mapdql(str): Query syntax to run
901  [setup : queries(list)]
902  [teardown : queries(list)]
903  """
904  query_results = list()
905  if kwargs['do_run']:
906  for query in kwargs['queries']:
907  result = run_query(
908  query=query, iterations=1,
909  trim=kwargs['trim'],
910  con=kwargs['con']
911  )
912  if not result['query_succeeded']:
913  logging.warning(
914  "Error setup or teardown query: "
915  + query["name"]
916  + ". did not complete."
917  )
918  else:
919  query_results.append(result)
920  return query_results
921 
def run_setup_teardown_query

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_db (   kwargs)
  Send results dataset to a database using pymapd

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    table(str): Results destination table name
    db_user(str): Results destination user name
    db_passwd(str): Results destination password
    db_server(str): Results destination server address
    db_port(int): Results destination server port
    db_name(str): Results destination database name
    table_schema_file(str): Path to destination database schema file

  Returns:
    True(bool): Sending results to destination database succeeded
    False(bool): Sending results to destination database failed. Exception
        should be logged.

Definition at line 1143 of file run_benchmark.py.

References get_connection(), and omnisci.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1144 def send_results_db(**kwargs):
1145  """
1146  Send results dataset to a database using pymapd
1147 
1148  Kwargs:
1149  results_dataset(list):::
1150  result_dataset(dict): Query results dataset
1151  table(str): Results destination table name
1152  db_user(str): Results destination user name
1153  db_passwd(str): Results destination password
1154  db_server(str): Results destination server address
1155  db_port(int): Results destination server port
1156  db_name(str): Results destination database name
1157  table_schema_file(str): Path to destination database schema file
1158 
1159  Returns:
1160  True(bool): Sending results to destination database succeeded
1161  False(bool): Sending results to destination database failed. Exception
1162  should be logged.
1163  """
1164  # Create dataframe from list of query results
1165  logging.debug("Converting results list to pandas dataframe")
1166  results_df = DataFrame(kwargs["results_dataset"])
1167  # Establish connection to destination db
1168  logging.debug("Connecting to destination db")
1169  dest_con = get_connection(
1170  db_user=kwargs["db_user"],
1171  db_passwd=kwargs["db_passwd"],
1172  db_server=kwargs["db_server"],
1173  db_port=kwargs["db_port"],
1174  db_name=kwargs["db_name"],
1175  )
1176  if not dest_con:
1177  logging.exception("Could not connect to destination db.")
1178  return False
1179  # Load results into db, creating table if it does not exist
1180  tables = dest_con.get_tables()
1181  if kwargs["table"] not in tables:
1182  logging.info("Destination table does not exist. Creating.")
1183  try:
1184  with open(kwargs["table_schema_file"], "r") as table_schema:
1185  logging.debug(
1186  "Reading table_schema_file: " + kwargs["table_schema_file"]
1187  )
1188  create_table_sql = table_schema.read().replace("\n", " ")
1189  create_table_sql = create_table_sql.replace(
1190  "##TAB##", kwargs["table"]
1191  )
1192  except FileNotFoundError:
1193  logging.exception("Could not find destination table_schema_file.")
1194  return False
1195  try:
1196  logging.debug("Executing create destination table query")
1197  dest_con.execute(create_table_sql)
1198  logging.debug("Destination table created.")
1199  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
1200  logging.exception("Error running destination table creation")
1201  return False
1202  logging.info("Loading results into destination db")
1203  try:
1204  dest_con.load_table_columnar(
1205  kwargs["table"],
1206  results_df,
1207  preserve_index=False,
1208  chunk_size_bytes=0,
1209  col_names_from_schema=True,
1210  )
1211  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
1212  logging.exception("Error loading results into destination db")
1213  dest_con.close()
1214  return False
1215  dest_con.close()
1216  return True
1217 
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_file_json (   kwargs)
  Send results dataset to a local json file

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset
    output_file_json (str): Location of .json file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1218 of file run_benchmark.py.

References omnisci.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1219 def send_results_file_json(**kwargs):
1220  """
1221  Send results dataset to a local json file
1222 
1223  Kwargs:
1224  results_dataset_json(str): Json-formatted query results dataset
1225  output_file_json (str): Location of .json file output
1226 
1227  Returns:
1228  True(bool): Sending results to json file succeeded
1229  False(bool): Sending results to json file failed. Exception
1230  should be logged.
1231  """
1232  try:
1233  logging.debug("Opening json output file for writing")
1234  with open(kwargs["output_file_json"], "w") as file_json_open:
1235  logging.info(
1236  "Writing to output json file: " + kwargs["output_file_json"]
1237  )
1238  file_json_open.write(kwargs["results_dataset_json"])
1239  return True
1240  except IOError:
1241  logging.exception("Error writing results to json output file")
1242  return False
1243 
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64
def send_results_file_json

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_jenkins_bench (   kwargs)
  Send results dataset to a local json file formatted for use with jenkins
    benchmark plugin: https://github.com/jenkinsci/benchmark-plugin

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    thresholds_name(str): Name to use for Jenkins result field
    thresholds_field(str): Field to use for query threshold in jenkins
    output_tag_jenkins(str): Jenkins benchmark result tag, for different
        sets from same table
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1244 of file run_benchmark.py.

References omnisci.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1245 def send_results_jenkins_bench(**kwargs):
1246  """
1247  Send results dataset to a local json file formatted for use with jenkins
1248  benchmark plugin: https://github.com/jenkinsci/benchmark-plugin
1249 
1250  Kwargs:
1251  results_dataset(list):::
1252  result_dataset(dict): Query results dataset
1253  thresholds_name(str): Name to use for Jenkins result field
1254  thresholds_field(str): Field to use for query threshold in jenkins
1255  output_tag_jenkins(str): Jenkins benchmark result tag, for different
1256  sets from same table
1257  output_file_jenkins (str): Location of .json jenkins file output
1258 
1259  Returns:
1260  True(bool): Sending results to json file succeeded
1261  False(bool): Sending results to json file failed. Exception
1262  should be logged.
1263  """
1264  jenkins_bench_results = []
1265  for result_dataset in kwargs["results_dataset"]:
1266  logging.debug("Constructing output for jenkins benchmark plugin")
1267  jenkins_bench_results.append(
1268  {
1269  "name": result_dataset["query_id"],
1270  "description": "",
1271  "parameters": [],
1272  "results": [
1273  {
1274  "name": result_dataset["query_id"]
1275  + "_"
1276  + kwargs["thresholds_name"],
1277  "description": "",
1278  "unit": "ms",
1279  "dblValue": result_dataset[kwargs["thresholds_field"]],
1280  }
1281  ],
1282  }
1283  )
1284  jenkins_bench_json = json.dumps(
1285  {
1286  "groups": [
1287  {
1288  "name": result_dataset["run_table"]
1289  + kwargs["output_tag_jenkins"],
1290  "description": "Source table: "
1291  + result_dataset["run_table"],
1292  "tests": jenkins_bench_results,
1293  }
1294  ]
1295  }
1296  )
1297  try:
1298  logging.debug("Opening jenkins_bench json output file for writing")
1299  with open(kwargs["output_file_jenkins"], "w") as file_jenkins_open:
1300  logging.info(
1301  "Writing to jenkins_bench json file: "
1302  + kwargs["output_file_jenkins"]
1303  )
1304  file_jenkins_open.write(jenkins_bench_json)
1305  return True
1306  except IOError:
1307  logging.exception("Error writing results to jenkins json output file")
1308  return False
1309 
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64
def send_results_jenkins_bench

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_output (   kwargs)
  Send results dataset script output

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset

  Returns:
    True(bool): Sending results to output succeeded

Definition at line 1310 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1311 def send_results_output(**kwargs):
1312  """
1313  Send results dataset script output
1314 
1315  Kwargs:
1316  results_dataset_json(str): Json-formatted query results dataset
1317 
1318  Returns:
1319  True(bool): Sending results to output succeeded
1320  """
1321  logging.info("Printing query results to output")
1322  print(kwargs["results_dataset_json"])
1323  return True
1324 

+ Here is the caller graph for this function:

def run_benchmark.validate_query_file (   kwargs)
  Validates query file. Currently only checks the query file name

  Kwargs:
    query_filename(str): Name of query file

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 419 of file run_benchmark.py.

Referenced by read_query_files().

420 def validate_query_file(**kwargs):
421  """
422  Validates query file. Currently only checks the query file name
423 
424  Kwargs:
425  query_filename(str): Name of query file
426 
427  Returns:
428  True(bool): Query succesfully validated
429  False(bool): Query failed validation
430  """
431  if not kwargs["query_filename"].endswith(".sql"):
432  logging.warning(
433  "Query filename "
434  + kwargs["query_filename"]
435  + ' is invalid - does not end in ".sql". Skipping'
436  )
437  return False
438  else:
439  return True
440 

+ Here is the caller graph for this function:

def run_benchmark.validate_setup_teardown_query_file (   kwargs)
  Validates query file. Currently only checks the query file name, and
  checks for setup or teardown in basename

  Kwargs:
    query_filename(str): Name of query file
    check_which(bool): either 'setup' or 'teardown', decide which to
                       check
    quiet(bool): optional, if True, no warning is logged

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 375 of file run_benchmark.py.

Referenced by read_setup_teardown_query_files().

377  """
378  Validates query file. Currently only checks the query file name, and
379  checks for setup or teardown in basename
380 
381  Kwargs:
382  query_filename(str): Name of query file
383  check_which(bool): either 'setup' or 'teardown', decide which to
384  check
385  quiet(bool): optional, if True, no warning is logged
386 
387  Returns:
388  True(bool): Query succesfully validated
389  False(bool): Query failed validation
390  """
391  qfilename = kwargs["query_filename"]
392  basename = os.path.basename(qfilename)
393  check_str = False
394  if kwargs["check_which"] == 'setup':
395  check_str = basename.lower().find('setup') > -1
396  elif kwargs["check_which"] == 'teardown':
397  check_str = basename.lower().find('teardown') > -1
398  else:
399  raise TypeError('Unsupported `check_which` parameter.')
400  return_val = True
401  if not qfilename.endswith(".sql"):
402  logging.warning(
403  "Query filename "
404  + qfilename
405  + ' is invalid - does not end in ".sql". Skipping'
406  )
407  return_val = False
408  elif not check_str:
409  quiet = True if 'quiet' in kwargs and kwargs['quiet'] else False
410  if not quiet:
411  logging.warning(
412  "Query filename "
413  + qfilename
414  + ' does not match "setup" or "teardown". Skipping'
415  )
416  return_val = False
417  return return_val
418 
def validate_setup_teardown_query_file

+ Here is the caller graph for this function:

def run_benchmark.verify_destinations (   kwargs)
  Verify script output destination(s)

  Kwargs:
    destinations (list): List of destinations
    dest_db_server (str): DB output destination server
    output_file_json (str): Location of .json file output
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Destination(s) is/are valid
    False(bool): Destination(s) is/are not valid

Definition at line 17 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

17 
18 def verify_destinations(**kwargs):
19  """
20  Verify script output destination(s)
21 
22  Kwargs:
23  destinations (list): List of destinations
24  dest_db_server (str): DB output destination server
25  output_file_json (str): Location of .json file output
26  output_file_jenkins (str): Location of .json jenkins file output
27 
28  Returns:
29  True(bool): Destination(s) is/are valid
30  False(bool): Destination(s) is/are not valid
31  """
32  if "mapd_db" in kwargs["destinations"]:
33  valid_destination_set = True
34  if kwargs["dest_db_server"] is None:
35  # If dest_server is not set for mapd_db, then exit
36  logging.error(
37  '"dest_server" is required when destination = "mapd_db"'
38  )
39  if "file_json" in kwargs["destinations"]:
40  valid_destination_set = True
41  if kwargs["output_file_json"] is None:
42  # If output_file_json is not set for file_json, then exit
43  logging.error(
44  '"output_file_json" is required when destination = "file_json"'
45  )
46  if "output" in kwargs["destinations"]:
47  valid_destination_set = True
48  if "jenkins_bench" in kwargs["destinations"]:
49  valid_destination_set = True
50  if kwargs["output_file_jenkins"] is None:
51  # If output_file_jenkins is not set for jenkins_bench, then exit
52  logging.error(
53  '"output_file_jenkins" is required '
54  + 'when destination = "jenkins_bench"'
55  )
56  if not valid_destination_set:
57  return False
58  else:
59  return True
60 
def verify_destinations

+ Here is the caller graph for this function: