130 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			130 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | //---------------------------------------------------------------------------// | ||
|  | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | ||
|  | // | ||
|  | // Distributed under the Boost Software License, Version 1.0 | ||
|  | // See accompanying file LICENSE_1_0.txt or copy at | ||
|  | // http://www.boost.org/LICENSE_1_0.txt | ||
|  | // | ||
|  | // See http://boostorg.github.com/compute for more information. | ||
|  | //---------------------------------------------------------------------------// | ||
|  | 
 | ||
|  | #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP | ||
|  | #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP | ||
|  | 
 | ||
|  | #include <numeric> | ||
|  | 
 | ||
|  | #include <boost/compute/detail/meta_kernel.hpp> | ||
|  | #include <boost/compute/container/vector.hpp> | ||
|  | 
 | ||
|  | namespace boost { | ||
|  | namespace compute { | ||
|  | namespace detail { | ||
|  | 
 | ||
|  | template<class InputIterator, class Predicate> | ||
|  | class count_if_with_threads_kernel : meta_kernel | ||
|  | { | ||
|  | public: | ||
|  |     typedef typename | ||
|  |         std::iterator_traits<InputIterator>::value_type | ||
|  |         value_type; | ||
|  | 
 | ||
|  |     count_if_with_threads_kernel() | ||
|  |         : meta_kernel("count_if_with_threads") | ||
|  |     { | ||
|  |     } | ||
|  | 
 | ||
|  |     void set_args(InputIterator first, | ||
|  |                   InputIterator last, | ||
|  |                   Predicate predicate) | ||
|  | 
 | ||
|  |     { | ||
|  |         typedef typename std::iterator_traits<InputIterator>::value_type T; | ||
|  | 
 | ||
|  |         m_size = detail::iterator_range_size(first, last); | ||
|  | 
 | ||
|  |         m_size_arg = add_arg<const ulong_>("size"); | ||
|  |         m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts"); | ||
|  | 
 | ||
|  |         *this << | ||
|  |             // thread parameters | ||
|  |             "const uint gid = get_global_id(0);\n" << | ||
|  |             "const uint block_size = size / get_global_size(0);\n" << | ||
|  |             "const uint start = block_size * gid;\n" << | ||
|  |             "uint end = 0;\n" << | ||
|  |             "if(gid == get_global_size(0) - 1)\n" << | ||
|  |             "    end = size;\n" << | ||
|  |             "else\n" << | ||
|  |             "    end = block_size * gid + block_size;\n" << | ||
|  | 
 | ||
|  |             // count values | ||
|  |             "uint count = 0;\n" << | ||
|  |             "for(uint i = start; i < end; i++){\n" << | ||
|  |                 decl<const T>("value") << "=" | ||
|  |                     << first[expr<uint_>("i")] << ";\n" << | ||
|  |                 if_(predicate(var<const T>("value"))) << "{\n" << | ||
|  |                     "count++;\n" << | ||
|  |                 "}\n" << | ||
|  |             "}\n" << | ||
|  | 
 | ||
|  |             // write count | ||
|  |             "counts[gid] = count;\n"; | ||
|  |     } | ||
|  | 
 | ||
|  |     size_t exec(command_queue &queue) | ||
|  |     { | ||
|  |         const device &device = queue.get_device(); | ||
|  |         const context &context = queue.get_context(); | ||
|  | 
 | ||
|  |         size_t threads = device.compute_units(); | ||
|  | 
 | ||
|  |         const size_t minimum_block_size = 2048; | ||
|  |         if(m_size / threads < minimum_block_size){ | ||
|  |             threads = static_cast<size_t>( | ||
|  |                           (std::max)( | ||
|  |                               std::ceil(float(m_size) / minimum_block_size), | ||
|  |                               1.0f | ||
|  |                           ) | ||
|  |                       ); | ||
|  |         } | ||
|  | 
 | ||
|  |         // storage for counts | ||
|  |         ::boost::compute::vector<ulong_> counts(threads, context); | ||
|  | 
 | ||
|  |         // exec kernel | ||
|  |         set_arg(m_size_arg, static_cast<ulong_>(m_size)); | ||
|  |         set_arg(m_counts_arg, counts.get_buffer()); | ||
|  |         exec_1d(queue, 0, threads, 1); | ||
|  | 
 | ||
|  |         // copy counts to the host | ||
|  |         std::vector<ulong_> host_counts(threads); | ||
|  |         ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue); | ||
|  | 
 | ||
|  |         // return sum of counts | ||
|  |         return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0)); | ||
|  |     } | ||
|  | 
 | ||
|  | private: | ||
|  |     size_t m_size; | ||
|  |     size_t m_size_arg; | ||
|  |     size_t m_counts_arg; | ||
|  | }; | ||
|  | 
 | ||
|  | // counts values that match the predicate using one thread per block. this is | ||
|  | // optimized for cpu-type devices with a small number of compute units. | ||
|  | template<class InputIterator, class Predicate> | ||
|  | inline size_t count_if_with_threads(InputIterator first, | ||
|  |                                     InputIterator last, | ||
|  |                                     Predicate predicate, | ||
|  |                                     command_queue &queue) | ||
|  | { | ||
|  |     count_if_with_threads_kernel<InputIterator, Predicate> kernel; | ||
|  |     kernel.set_args(first, last, predicate); | ||
|  |     return kernel.exec(queue); | ||
|  | } | ||
|  | 
 | ||
|  | } // end detail namespace | ||
|  | } // end compute namespace | ||
|  | } // end boost namespace | ||
|  | 
 | ||
|  | #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP |