OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StringTransform.cpp File Reference
#include "StringTransform.h"
#include <numeric>
#include <random>
#include <regex>
+ Include dependency graph for StringTransform.cpp:

Go to the source code of this file.

Functions

void apply_shim (std::string &result, const boost::regex &reg_expr, const std::function< void(std::string &, const boost::smatch &)> &shim_fn)
 
std::vector< std::pair< size_t,
size_t > > 
find_string_literals (const std::string &query)
 
std::string hide_sensitive_data_from_query (std::string const &query_str)
 
ssize_t inside_string_literal (const size_t start, const size_t length, const std::vector< std::pair< size_t, size_t >> &literal_positions)
 
template<>
std::string to_string (char const *&&v)
 
template<>
std::string to_string (std::string &&v)
 
std::string generate_random_string (const size_t len)
 
std::vector< std::string > split (const std::string &str, const std::string &delim)
 split apart a string into a vector of substrings More...
 
std::string strip (const std::string &str)
 trim any whitespace from the left and right ends of a string More...
 
bool remove_unquoted_newlines_linefeeds_and_tabs_from_sql_string (std::string &str) noexcept
 sanitize an SQL string More...
 

Function Documentation

void apply_shim ( std::string &  result,
const boost::regex &  reg_expr,
const std::function< void(std::string &, const boost::smatch &)> &  shim_fn 
)

Definition at line 23 of file StringTransform.cpp.

References find_string_literals(), and inside_string_literal().

Referenced by MapDHandler::apply_copy_to_shim(), and anonymous_namespace{CalciteAdapter.cpp}::pg_shim_impl().

25  {
26  boost::smatch what;
27  std::vector<std::pair<size_t, size_t>> lit_pos = find_string_literals(result);
28  auto start_it = result.cbegin();
29  auto end_it = result.cend();
30  while (true) {
31  if (!boost::regex_search(start_it, end_it, what, reg_expr)) {
32  break;
33  }
34  const auto next_start =
35  inside_string_literal(what.position(), what.length(), lit_pos);
36  if (next_start >= 0) {
37  start_it = result.cbegin() + next_start;
38  } else {
39  shim_fn(result, what);
40  lit_pos = find_string_literals(result);
41  start_it = result.cbegin();
42  end_it = result.cend();
43  }
44  }
45 }
ssize_t inside_string_literal(const size_t start, const size_t length, const std::vector< std::pair< size_t, size_t >> &literal_positions)
std::vector< std::pair< size_t, size_t > > find_string_literals(const std::string &query)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<std::pair<size_t, size_t> > find_string_literals ( const std::string &  query)

Definition at line 47 of file StringTransform.cpp.

References CHECK_GT.

Referenced by apply_shim().

47  {
48  boost::regex literal_string_regex{R"(([^']+)('(?:[^']+|'')+'))", boost::regex::perl};
49  boost::smatch what;
50  auto it = query.begin();
51  auto prev_it = it;
52  std::vector<std::pair<size_t, size_t>> positions;
53  while (true) {
54  if (!boost::regex_search(it, query.end(), what, literal_string_regex)) {
55  break;
56  }
57  CHECK_GT(what[1].length(), 0);
58  prev_it = it;
59  it += what.length();
60  positions.emplace_back(prev_it + what[1].length() - query.begin(),
61  it - query.begin());
62  }
63  return positions;
64 }
#define CHECK_GT(x, y)
Definition: Logger.h:202

+ Here is the caller graph for this function:

std::string generate_random_string ( const size_t  len)

Definition at line 102 of file StringTransform.cpp.

Referenced by MapDHandler::connect_impl(), MapDHandler::createInMemoryCalciteSession(), Catalog_Namespace::SysCatalog::syncUserWithRemoteProvider(), and Catalog_Namespace::SysCatalog::updateBlankPasswordsToRandom().

102  {
103  static char charset[] =
104  "0123456789"
105  "abcdefghijklmnopqrstuvwxyz"
106  "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
107 
108  static std::mt19937 prng{std::random_device{}()};
109  static std::uniform_int_distribution<size_t> dist(0, strlen(charset) - 1);
110 
111  std::string str;
112  str.reserve(len);
113  for (size_t i = 0; i < len; i++) {
114  str += charset[dist(prng)];
115  }
116  return str;
117 }

+ Here is the caller graph for this function:

std::string hide_sensitive_data_from_query ( std::string const &  query_str)

Definition at line 66 of file StringTransform.cpp.

Referenced by query_state::StdLog::log(), and MapDHandler::parse_to_plan_legacy().

66  {
67  constexpr std::regex::flag_type flags =
68  std::regex::ECMAScript | std::regex::icase | std::regex::optimize;
69  static const std::initializer_list<std::pair<std::regex, std::string>> rules{
70  {std::regex(R"(\b((?:password|s3_access_key|s3_secret_key)\s*=\s*)'.+?')", flags),
71  "$1'XXXXXXXX'"},
72  {std::regex(R"((\\set_license\s+)\S+)", flags), "$1XXXXXXXX"}};
73  return std::accumulate(
74  rules.begin(), rules.end(), query_str, [](auto& str, auto& rule) {
75  return std::regex_replace(str, rule.first, rule.second);
76  });
77 }

+ Here is the caller graph for this function:

ssize_t inside_string_literal ( const size_t  start,
const size_t  length,
const std::vector< std::pair< size_t, size_t >> &  literal_positions 
)

Definition at line 79 of file StringTransform.cpp.

Referenced by apply_shim().

82  {
83  const auto end = start + length;
84  for (const auto& literal_position : literal_positions) {
85  if (literal_position.first <= start && end <= literal_position.second) {
86  return literal_position.second;
87  }
88  }
89  return -1;
90 }

+ Here is the caller graph for this function:

bool remove_unquoted_newlines_linefeeds_and_tabs_from_sql_string ( std::string &  str)
noexcept

sanitize an SQL string

Definition at line 141 of file StringTransform.cpp.

142  {
143  char inside_quote = 0;
144  bool previous_c_was_backslash = false;
145  for (auto& c : str) {
146  // if this character is a quote of either type
147  if (c == '\'' || c == '\"') {
148  // ignore if previous character was a backslash
149  if (!previous_c_was_backslash) {
150  // start or end of a quoted region
151  if (inside_quote == c) {
152  // end region
153  inside_quote = 0;
154  } else if (inside_quote == 0) {
155  // start region
156  inside_quote = c;
157  }
158  }
159  } else if (inside_quote == 0) {
160  // outside quoted region
161  if (c == '\n' || c == '\t' || c == '\r') {
162  // replace these with space
163  c = ' ';
164  }
165  // otherwise leave alone, including quotes of a different type
166  }
167  // handle backslashes, except for double backslashes
168  if (c == '\\') {
169  previous_c_was_backslash = !previous_c_was_backslash;
170  } else {
171  previous_c_was_backslash = false;
172  }
173  }
174  // if we didn't end a region, there were unclosed or mixed-nested quotes
175  // accounting for backslashes should mean that this should only be the
176  // case with truly malformed strings which Calcite will barf on anyway
177  return (inside_quote == 0);
178 }
std::vector<std::string> split ( const std::string &  str,
const std::string &  delim 
)

split apart a string into a vector of substrings

Definition at line 119 of file StringTransform.cpp.

References CHECK(), and run_benchmark_import::result.

Referenced by Catalog_Namespace::Catalog::adjustAlteredTableFiles(), create_table.SyntheticTable::createDataAndImportTable(), com.omnisci.jdbc.OmniSciStatement::executeQuery(), anonymous_namespace{MapDHandler.cpp}::extract_projection_tokens_for_completion(), get_qualified_column_hints(), run_benchmark::read_query_files(), Catalog_Namespace::Catalog::restoreTable(), QueryRunner::QueryRunner::runMultipleStatements(), and Parser::splitObjectHierName().

119  {
120  CHECK(!delim.empty());
121  std::vector<std::string> result;
122  std::string::size_type i = 0, j = 0;
123  while ((i = str.find(delim, i)) != std::string::npos) {
124  result.emplace_back(str, j, i - j);
125  i += delim.size();
126  j = i;
127  }
128  result.emplace_back(str, j);
129  return result;
130 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string strip ( const std::string &  str)

trim any whitespace from the left and right ends of a string

Definition at line 132 of file StringTransform.cpp.

Referenced by QueryRunner::QueryRunner::runMultipleStatements().

132  {
133  std::string::size_type i, j;
134  for (i = 0; i < str.size() && std::isspace(str[i]); ++i) {
135  }
136  for (j = str.size(); j > i && std::isspace(str[j - 1]); --j) {
137  }
138  return str.substr(i, j - i);
139 }

+ Here is the caller graph for this function:

template<>
std::string to_string ( char const *&&  v)

Definition at line 93 of file StringTransform.cpp.

Referenced by Importer_NS::TypedImportBuffer::add_value(), Catalog_Namespace::Catalog::addColumn(), Catalog_Namespace::Catalog::addFrontendViewToMapNoLock(), Catalog_Namespace::Catalog::addLinkToMap(), Catalog_Namespace::Catalog::addReferenceToForeignDict(), CgenState::addStringConstant(), Catalog_Namespace::Catalog::adjustAlteredTableFiles(), anonymous_namespace{TargetExprBuilder.cpp}::agg_fn_base_names(), Catalog_Namespace::SysCatalog::alterUser(), FixedLengthEncoder< T, V >::appendData(), query_state::StdLog::appendNameValuePairs(), Archive::archive_error(), Catalog_Namespace::Catalog::buildMaps(), Catalog_Namespace::SysCatalog::buildObjectDescriptorMap(), ResultSetReductionJIT::cacheKey(), Catalog_Namespace::Catalog::checkDateInDaysColumnMigration(), anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), CodeGenerator::codegenArrayAt(), CodeGenerator::codegenHoistedConstantsLoads(), CodeGenerator::codegenHoistedConstantsPlaceholders(), BaselineJoinHashTable::codegenMatchingSet(), GroupByAndAggregate::codegenOutputSlot(), BaselineJoinHashTable::codegenSlot(), CodeGenerator::codgenAdjustFixedEncNull(), CodeGenerator::colByteStream(), BloscCompressor::compress(), Importer_NS::compress_coords(), OverlapsJoinHashTable::computeBucketSizes(), MapDHandler::convert_rows(), MapDHandler::convert_target_metainfo(), File_Namespace::create(), MapDHandler::create_dashboard(), create_dev_group_by_buffers(), MapDHandler::create_table(), Catalog_Namespace::Catalog::createDashboard(), Catalog_Namespace::Catalog::createDashboardSystemRoles(), Catalog_Namespace::SysCatalog::createDatabase(), Catalog_Namespace::Catalog::createLink(), Catalog_Namespace::Catalog::createTable(), RelAlgExecutor::createTableFunctionWorkUnit(), Catalog_Namespace::SysCatalog::createUser(), anonymous_namespace{ArrowImporter.h}::data_conversion_error(), datum_to_string(), DatumToString(), ArrowResultSet::deallocateArrowResultBuffer(), decodeJoinHashBufferToString(), anonymous_namespace{JoinHashTableInterface.cpp}::decodeJoinHashBufferToStringFlat(), BloscCompressor::decompress(), Catalog_Namespace::Catalog::delDictionary(), MapDHandler::delete_dashboard(), Catalog_Namespace::Catalog::deleteMetadataForDashboard(), Catalog_Namespace::SysCatalog::deleteObjectDescriptorMap(), Catalog_Namespace::deleteObjectPrivileges(), DecimalOverflowValidator::do_validate(), Catalog_Namespace::Catalog::doDropTable(), Catalog_Namespace::SysCatalog::dropDatabase(), Catalog_Namespace::Catalog::dropTable(), Catalog_Namespace::SysCatalog::dropUser(), Catalog_Namespace::Catalog::dumpSchema(), Catalog_Namespace::Catalog::dumpTable(), anonymous_namespace{ResultSetReductionJIT.cpp}::emit_write_projection(), anonymous_namespace{ArrowImporter.h}::error_context(), Parser::CopyTableStmt::execute(), Parser::ExportQueryStmt::execute(), Executor::executeSimpleInsert(), File_Namespace::FileMgr::FileMgr(), ProxyTHttpClient::flush(), Importer_NS::GDALErrorHandler(), anonymous_namespace{JoinLoopTest.cpp}::generate_descriptors(), Catalog_Namespace::Catalog::generatePhysicalTableName(), Geo_namespace::GeoPoint::GeoPoint(), MapDHandler::get_dashboard(), MapDHandler::get_dashboard_grantees(), DateTimeUtils::get_dateadd_high_precision_adjusted_scale(), DateTimeUtils::get_dateadd_timestamp_precision_scale(), MapDHandler::get_db_object_privs(), DateTimeUtils::get_extract_timestamp_precision_scale(), Importer_NS::Detector::get_headers(), MapDHandler::get_link_view(), TimeGM::get_overflow_underflow_safe_epoch(), ThriftClientConnection::get_protocol(), HitTestTypes::get_rowid_regex(), anonymous_namespace{Execute.cpp}::get_table_name(), DateTimeUtils::get_timestamp_precision_scale(), SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities >::get_type_name(), MapDHandler::get_valid_groups(), ArrowResultSetConverter::getArrowType(), Fragmenter_Namespace::InsertOrderFragmenter::getChunkMetadata(), Parser::InsertIntoTableAsSelectStmt::LocalConnector::getColumnDescriptors(), getCurrentStackTrace(), CudaMgr_Namespace::CudaMgr::getDeviceProperties(), RelAlgExecutor::getErrorMessageFromCode(), Catalog_Namespace::SysCatalog::getGranteesOfSharedDashboards(), Catalog_Namespace::Catalog::getMetadataForDashboard(), Catalog_Namespace::SysCatalog::getMetadataForDBById(), Catalog_Namespace::SysCatalog::getMetadataForObject(), Catalog_Namespace::SysCatalog::getMetadataForUserById(), Catalog_Namespace::SysCatalog::getMetadataWithDefaultDB(), Catalog_Namespace::SysCatalog::getRoles(), MapDHandler::has_object_privilege(), MapDHandler::import_geo_table(), Importer_NS::import_thread_delimited(), Importer_NS::import_thread_shapefile(), File_Namespace::FileMgr::init(), anonymous_namespace{JoinHashTableInterface.cpp}::innerDecodeJoinHashBufferToString(), Catalog_Namespace::insertOrUpdateObjectPrivileges(), Executor::interrupt(), ResultSet::isGeoColOnGpu(), TableFunctionExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), MapDHandler::load_table(), DBObject::loadKey(), anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_arguments(), Catalog_Namespace::SysCatalog::migrateDBAccessPrivileges(), numeric_type_name(), Importer_NS::ogr_to_type(), Geo_namespace::GeoTypesError::OGRErrorToStr(), File_Namespace::open(), anonymous_namespace{ArrowImporter.h}::ArrowValue< float >::operator const std::string(), anonymous_namespace{ArrowImporter.h}::ArrowValue< double >::operator const std::string(), anonymous_namespace{ArrowImporter.h}::ArrowValue< int64_t >::operator const std::string(), Executor::optimizeAndCodegenGPU(), OutOfMemory::parse_error_str(), parse_numeric(), Importer_NS::DelimitedParserUtils::parseStringArray(), anonymous_namespace{geo_types.cpp}::process_poly_ring(), Catalog_Namespace::Catalog::recordOwnershipOfObjectsInObjectPermissions(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), QueryMemoryDescriptor::reductionKey(), Executor::registerActiveModule(), BaselineJoinHashTable::reifyForDevice(), Catalog_Namespace::Catalog::renameColumn(), Catalog_Namespace::SysCatalog::renameDatabase(), File_Namespace::renameForDelete(), Catalog_Namespace::SysCatalog::renameObjectsInDescriptorMap(), Catalog_Namespace::Catalog::renamePhysicalTable(), Catalog_Namespace::Catalog::replaceDashboard(), Catalog_Namespace::Catalog::restoreTable(), ArrowResultSet::resultSetArrowLoopback(), Catalog_Namespace::SysCatalog::revokeAllOnDatabase_unsafe(), Catalog_Namespace::run(), Executor::ExecutionDispatch::runImpl(), Importer_NS::RenderGroupAnalyzer::seedFromExistingTableContents(), Catalog_Namespace::Catalog::setColumnDictionary(), Catalog_Namespace::Catalog::setColumnSharedDictionary(), start_calcite_server_as_daemon(), query_state::StdLogData::StdLogData(), anonymous_namespace{ResultSetReductionJIT.cpp}::target_info_key(), MapDHandler::thrift_to_copyparams(), Parser::InSubquery::to_string(), Parser::InValues::to_string(), RexAbstractInput::toString(), ColSlotContext::toString(), DBObject::toString(), Analyzer::ColumnVar::toString(), RexOperator::toString(), Analyzer::Var::toString(), RexSubQuery::toString(), QueryMemoryDescriptor::toString(), RexInput::toString(), Analyzer::UOper::toString(), SortField::toString(), RexRef::toString(), RexAgg::toString(), Analyzer::InIntegerSet::toString(), RelScan::toString(), RelProject::toString(), RelAggregate::toString(), RelJoin::toString(), RelFilter::toString(), Analyzer::LikelihoodExpr::toString(), RelLeftDeepInnerJoin::toString(), RelCompound::toString(), Analyzer::ExtractExpr::toString(), RelSort::toString(), Analyzer::DateaddExpr::toString(), Analyzer::DatediffExpr::toString(), RelModify::toString(), Analyzer::DatetruncExpr::toString(), Analyzer::OrderEntry::toString(), RelTableFunction::toString(), RelLogicalValues::toString(), RelAlgTranslator::translateGeoFunctionArg(), RelAlgTranslator::translateHPTLiteral(), Executor::unregisterActiveModule(), Catalog_Namespace::SysCatalog::updateBlankPasswordsToRandom(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), Catalog_Namespace::Catalog::updateDeletedColumnIndicator(), Catalog_Namespace::Catalog::updateDictionaryNames(), Catalog_Namespace::Catalog::updateFixlenArrayColumns(), Catalog_Namespace::Catalog::updateLogicalToPhysicalTableMap(), Catalog_Namespace::SysCatalog::updateObjectDescriptorMap(), Catalog_Namespace::Catalog::updatePageSize(), Catalog_Namespace::SysCatalog::updateSupportUserDeactivation(), Catalog_Namespace::Catalog::updateTableDescriptorSchema(), DateDaysOverflowValidator::validate(), and MapDProgramOptions::validate().

93  {
94  return std::string(v);
95 }
template<>
std::string to_string ( std::string &&  v)

Definition at line 98 of file StringTransform.cpp.

98  {
99  return std::move(v);
100 }