threading__std_8h_source.html

 #include <cassert>

 #include <cstddef>

 #include <future>

 #include <type_traits>

 #include <vector>

 #include "thread_count.h"


 #ifndef THREADING_STD_LAUNCH

 #define THREADING_STD_LAUNCH async  // async or deferred

 #endif


 namespace threading_common {

 class split {};


 class auto_partitioner {};

 // class static_partitioner;

 // class affinity_partitioner;


 template <typename Value>

 class blocked_range {

  public:


   using const_iterator = Value;


   using size_type = std::size_t;


   blocked_range(Value begin_, Value end_ /*TODO , size_type grainsize_=1*/)

       : my_end(end_)

       , my_begin(begin_)  //, my_grainsize(grainsize_)

   {

     // assert( my_grainsize>0 && "grainsize must be positive" );

   }


   const_iterator begin() const { return my_begin; }


   const_iterator end() const { return my_end; }


   size_type size() const {

     assert(!(end() < begin()) && "size() unspecified if end()<begin()");

     return size_type(my_end - my_begin);

   }


   size_type grainsize() const { return 1 /*my_grainsize*/; }


   //------------------------------------------------------------------------

   // Methods that implement Range concept

   //------------------------------------------------------------------------


   bool empty() const { return !(my_begin < my_end); }


   bool is_divisible() const { return /*TODO my_grainsize<*/ size(); }


   blocked_range(blocked_range& r, split)

       : my_end(r.my_end)

       , my_begin(do_split(r, split()))

   // TODO , my_grainsize(r.my_grainsize)

   {

     // only comparison 'less than' is required from values of blocked_range objects

     assert(!(my_begin < r.my_end) && !(r.my_end < my_begin) &&

            "blocked_range has been split incorrectly");

   }


  private:

   Value my_end;

   Value my_begin;

   // TODO size_type my_grainsize;


   static Value do_split(blocked_range& r, split) {

     assert(r.is_divisible() && "cannot split blocked_range that is not divisible");

     Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u;

     r.my_end = middle;

     return middle;

   }

 };

 }  // namespace threading_common


 namespace threading_std {


 using std::future;

 using namespace threading_common;

 constexpr auto launch = std::launch::THREADING_STD_LAUNCH;


 template <typename Fn,

           typename... Args,

           typename Result = std::result_of_t<Fn && (Args && ...)>>

 future<Result> async(Fn&& fn, Args&&... args) {

   return std::async(launch, std::forward<Fn>(fn), std::forward<Args>(args)...);

 }


 class task_group {

   std::vector<future<void>> threads_;


  public:

   template <typename F>

   void run(F&& f) {

     threads_.emplace_back(async(std::forward<F>(f)));

   }

   void cancel() { /*not implemented*/

   }

   void wait() {  // TODO task_group_status ?

     for (auto& child : this->threads_) {

       child.wait();

     }

   }

 };  // class task_group


 // template<typename Range, typename Body, typename Partitioner = auto_partitioner>

 // void parallel_for( const Range& range, const Body& body, const Partitioner &p =

 // Partitioner());


 template <typename Int, typename Body, typename Partitioner = auto_partitioner>

 void parallel_for(const blocked_range<Int>& range,

                   const Body& body,

                   const Partitioner& p = Partitioner()) {

   const Int worker_count = cpu_threads();

   std::vector<std::future<void>> worker_threads;

   worker_threads.reserve(worker_count);


   for (Int i = 0,

            start_entry = range.begin(),

            stop_entry = range.end(),

            stride = (range.size() + worker_count - 1) / worker_count;

        i < worker_count && start_entry < stop_entry;

        ++i, start_entry += stride) {

     const auto end_entry = std::min(start_entry + stride, stop_entry);

     // TODO grainsize?

     worker_threads.emplace_back(

         std::async(launch, body, blocked_range<Int>(start_entry, end_entry)));

   }

   for (auto& child : worker_threads) {

     child.wait();

   }

 }


 template <typename Index, typename Function, typename Partitioner = auto_partitioner>

 void parallel_for(Index first,

                   Index last,

                   const Function& f,

                   const Partitioner& p = Partitioner()) {

   parallel_for(

       blocked_range<Index>(first, last),

       [&f](const blocked_range<Index>& r) {

         //#pragma ivdep

         //#pragma omp simd

         for (auto i = r.begin(), e = r.end(); i < e; i++) {

           f(i);

         }

       },

       p);

 }


 template <typename Int,

           typename Value,

           typename RealBody,

           typename Reduction,

           typename Partitioner = auto_partitioner>

 Value parallel_reduce(const blocked_range<Int>& range,

                       const Value& identity,

                       const RealBody& real_body,

                       const Reduction& reduction,

                       const Partitioner& p = Partitioner()) {

   const size_t worker_count = cpu_threads();

   std::vector<std::future<Value>> worker_threads;

   worker_threads.reserve(worker_count);


   for (Int i = 0,

            start_entry = range.begin(),

            stop_entry = range.end(),

            stride = (range.size() + worker_count - 1) / worker_count;

        i < worker_count && start_entry < stop_entry;

        ++i, start_entry += stride) {

     const auto end_entry = std::min(start_entry + stride, stop_entry);

     // TODO grainsize?

     worker_threads.emplace_back(std::async(

         launch, real_body, blocked_range<Int>(start_entry, end_entry), Value{}));

   }

   Value v = identity;

   for (auto& child : worker_threads) {

     v = reduction(v, child.get());

   }


   return v;

 }


 }  // namespace threading_std

threading_std::task_group::run
void run(F &&f)
Definition: threading_std.h:114

threading_common::blocked_range::size
size_type size() const
Size of the range.
Definition: threading_std.h:47

run_benchmark_import.args
tuple args
Definition: run_benchmark_import.py:247

threading_common::auto_partitioner
Definition: threading_std.h:15

thread_count.h

threading_std::task_group
Definition: threading_std.h:109

threading_std::parallel_for
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
Parallel iteration over range with default partitioner.
Definition: threading_std.h:133

threading_std::launch
constexpr auto launch
Definition: threading_std.h:100

threading_common::blocked_range::do_split
static Value do_split(blocked_range &r, split)
Auxiliary function used by the splitting constructor.
Definition: threading_std.h:87

threading_common::blocked_range::my_begin
Value my_begin
Definition: threading_std.h:83

threading_std::task_group::threads_
std::vector< future< void > > threads_
Definition: threading_std.h:110

threading_common::blocked_range::my_end
Value my_end
Definition: threading_std.h:82

threading_std::task_group::cancel
void cancel()
Definition: threading_std.h:117

threading_common::blocked_range::empty
bool empty() const
True if range is empty.
Definition: threading_std.h:60

threading_serial::async
future< Result > async(Fn &&fn, Args &&...args)
Definition: threading_serial.h:11

Value
Definition: ResultSetReductionOps.h:168

threading_common::blocked_range
A range over which to iterate.
Definition: threading_std.h:21

threading_std::async
future< Result > async(Fn &&fn, Args &&...args)
Definition: threading_std.h:105

threading_std::task_group::wait
void wait()
Definition: threading_std.h:119

threading_std::parallel_reduce
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
Definition: threading_std.h:182

threading_common::blocked_range::grainsize
size_type grainsize() const
The grain size for this range.
Definition: threading_std.h:53

threading_common::blocked_range::blocked_range
blocked_range(Value begin_, Value end_)
Construct range over half-open interval [begin,end), with the given grainsize.
Definition: threading_std.h:32

threading_common::blocked_range::blocked_range
blocked_range(blocked_range &r, split)
Split range.
Definition: threading_std.h:69

THREADING_STD_LAUNCH
#define THREADING_STD_LAUNCH
Definition: threading_std.h:9

f
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
Definition: TestTorchTableFunctions.cpp:103

threading_common::blocked_range::end
const_iterator end() const
One past last value in range.
Definition: threading_std.h:43

threading_common::split
Definition: threading_std.h:13

threading_common::blocked_range::begin
const_iterator begin() const
Beginning of range.
Definition: threading_std.h:40

cpu_threads
int cpu_threads()
Definition: thread_count.h:25

threading_common::blocked_range::is_divisible
bool is_divisible() const
True if range is divisible.
Definition: threading_std.h:64

threading_common::blocked_range::size_type
std::size_t size_type
Type for size of a range.
Definition: threading_std.h:29