130 lines
4.3 KiB
Plaintext
130 lines
4.3 KiB
Plaintext
//---------------------------------------------------------------------------//
|
|
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0
|
|
// See accompanying file LICENSE_1_0.txt or copy at
|
|
// http://www.boost.org/LICENSE_1_0.txt
|
|
//
|
|
// See http://boostorg.github.com/compute for more information.
|
|
//---------------------------------------------------------------------------//
|
|
|
|
#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
|
|
#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
|
|
|
|
#include <numeric>
|
|
|
|
#include <boost/compute/detail/meta_kernel.hpp>
|
|
#include <boost/compute/container/vector.hpp>
|
|
|
|
namespace boost {
|
|
namespace compute {
|
|
namespace detail {
|
|
|
|
template<class InputIterator, class Predicate>
|
|
class count_if_with_threads_kernel : meta_kernel
|
|
{
|
|
public:
|
|
typedef typename
|
|
std::iterator_traits<InputIterator>::value_type
|
|
value_type;
|
|
|
|
count_if_with_threads_kernel()
|
|
: meta_kernel("count_if_with_threads")
|
|
{
|
|
}
|
|
|
|
void set_args(InputIterator first,
|
|
InputIterator last,
|
|
Predicate predicate)
|
|
|
|
{
|
|
typedef typename std::iterator_traits<InputIterator>::value_type T;
|
|
|
|
m_size = detail::iterator_range_size(first, last);
|
|
|
|
m_size_arg = add_arg<const ulong_>("size");
|
|
m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
|
|
|
|
*this <<
|
|
// thread parameters
|
|
"const uint gid = get_global_id(0);\n" <<
|
|
"const uint block_size = size / get_global_size(0);\n" <<
|
|
"const uint start = block_size * gid;\n" <<
|
|
"uint end = 0;\n" <<
|
|
"if(gid == get_global_size(0) - 1)\n" <<
|
|
" end = size;\n" <<
|
|
"else\n" <<
|
|
" end = block_size * gid + block_size;\n" <<
|
|
|
|
// count values
|
|
"uint count = 0;\n" <<
|
|
"for(uint i = start; i < end; i++){\n" <<
|
|
decl<const T>("value") << "="
|
|
<< first[expr<uint_>("i")] << ";\n" <<
|
|
if_(predicate(var<const T>("value"))) << "{\n" <<
|
|
"count++;\n" <<
|
|
"}\n" <<
|
|
"}\n" <<
|
|
|
|
// write count
|
|
"counts[gid] = count;\n";
|
|
}
|
|
|
|
size_t exec(command_queue &queue)
|
|
{
|
|
const device &device = queue.get_device();
|
|
const context &context = queue.get_context();
|
|
|
|
size_t threads = device.compute_units();
|
|
|
|
const size_t minimum_block_size = 2048;
|
|
if(m_size / threads < minimum_block_size){
|
|
threads = static_cast<size_t>(
|
|
(std::max)(
|
|
std::ceil(float(m_size) / minimum_block_size),
|
|
1.0f
|
|
)
|
|
);
|
|
}
|
|
|
|
// storage for counts
|
|
::boost::compute::vector<ulong_> counts(threads, context);
|
|
|
|
// exec kernel
|
|
set_arg(m_size_arg, static_cast<ulong_>(m_size));
|
|
set_arg(m_counts_arg, counts.get_buffer());
|
|
exec_1d(queue, 0, threads, 1);
|
|
|
|
// copy counts to the host
|
|
std::vector<ulong_> host_counts(threads);
|
|
::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
|
|
|
|
// return sum of counts
|
|
return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
|
|
}
|
|
|
|
private:
|
|
size_t m_size;
|
|
size_t m_size_arg;
|
|
size_t m_counts_arg;
|
|
};
|
|
|
|
// counts values that match the predicate using one thread per block. this is
|
|
// optimized for cpu-type devices with a small number of compute units.
|
|
template<class InputIterator, class Predicate>
|
|
inline size_t count_if_with_threads(InputIterator first,
|
|
InputIterator last,
|
|
Predicate predicate,
|
|
command_queue &queue)
|
|
{
|
|
count_if_with_threads_kernel<InputIterator, Predicate> kernel;
|
|
kernel.set_args(first, last, predicate);
|
|
return kernel.exec(queue);
|
|
}
|
|
|
|
} // end detail namespace
|
|
} // end compute namespace
|
|
} // end boost namespace
|
|
|
|
#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
|