79 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			79 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //---------------------------------------------------------------------------//
 | |
| // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
 | |
| //
 | |
| // Distributed under the Boost Software License, Version 1.0
 | |
| // See accompanying file LICENSE_1_0.txt or copy at
 | |
| // http://www.boost.org/LICENSE_1_0.txt
 | |
| //
 | |
| // See http://boostorg.github.com/compute for more information.
 | |
| //---------------------------------------------------------------------------//
 | |
| 
 | |
| #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
 | |
| #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
 | |
| 
 | |
| #include <boost/compute/context.hpp>
 | |
| #include <boost/compute/command_queue.hpp>
 | |
| #include <boost/compute/container/vector.hpp>
 | |
| #include <boost/compute/algorithm/reduce.hpp>
 | |
| #include <boost/compute/functional/detail/nvidia_ballot.hpp>
 | |
| #include <boost/compute/functional/detail/nvidia_popcount.hpp>
 | |
| #include <boost/compute/detail/meta_kernel.hpp>
 | |
| 
 | |
| namespace boost {
 | |
| namespace compute {
 | |
| namespace detail {
 | |
| 
 | |
| template<class InputIterator, class Predicate>
 | |
| inline size_t count_if_with_ballot(InputIterator first,
 | |
|                                    InputIterator last,
 | |
|                                    Predicate predicate,
 | |
|                                    command_queue &queue)
 | |
| {
 | |
|     size_t count = iterator_range_size(first, last);
 | |
|     size_t block_size = 32;
 | |
|     size_t block_count = count / block_size;
 | |
|     if(block_count * block_size != count){
 | |
|         block_count++;
 | |
|     }
 | |
| 
 | |
|     const ::boost::compute::context &context = queue.get_context();
 | |
| 
 | |
|     ::boost::compute::vector<uint_> counts(block_count, context);
 | |
| 
 | |
|     ::boost::compute::detail::nvidia_popcount<uint_> popc;
 | |
|     ::boost::compute::detail::nvidia_ballot<uint_> ballot;
 | |
| 
 | |
|     meta_kernel k("count_if_with_ballot");
 | |
|     k <<
 | |
|         "const uint gid = get_global_id(0);\n" <<
 | |
| 
 | |
|         "bool value = false;\n" <<
 | |
|         "if(gid < count)\n" <<
 | |
|         "    value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" <<
 | |
| 
 | |
|         "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" <<
 | |
| 
 | |
|         "if(get_local_id(0) == 0)\n" <<
 | |
|             counts.begin()[k.var<uint_>("get_group_id(0)") ]
 | |
|                 << " = " << popc(k.var<uint_>("bits")) << ";\n";
 | |
| 
 | |
|     k.add_set_arg<const uint_>("count", count);
 | |
| 
 | |
|     k.exec_1d(queue, 0, block_size * block_count, block_size);
 | |
| 
 | |
|     uint_ result;
 | |
|     ::boost::compute::reduce(
 | |
|         counts.begin(),
 | |
|         counts.end(),
 | |
|         &result,
 | |
|         queue
 | |
|     );
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| } // end detail namespace
 | |
| } // end compute namespace
 | |
| } // end boost namespace
 | |
| 
 | |
| #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
 | 
