185 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			185 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | //---------------------------------------------------------------------------// | ||
|  | // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> | ||
|  | // | ||
|  | // Distributed under the Boost Software License, Version 1.0 | ||
|  | // See accompanying file LICENSE_1_0.txt or copy at | ||
|  | // http://www.boost.org/LICENSE_1_0.txt | ||
|  | // | ||
|  | // See http://boostorg.github.com/compute for more information. | ||
|  | //---------------------------------------------------------------------------// | ||
|  | 
 | ||
|  | #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP | ||
|  | #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP | ||
|  | 
 | ||
|  | #include <boost/preprocessor/seq/for_each.hpp> | ||
|  | 
 | ||
|  | #include <boost/compute/system.hpp> | ||
|  | #include <boost/compute/functional.hpp> | ||
|  | #include <boost/compute/command_queue.hpp> | ||
|  | #include <boost/compute/algorithm/reduce.hpp> | ||
|  | #include <boost/compute/algorithm/detail/serial_accumulate.hpp> | ||
|  | #include <boost/compute/container/array.hpp> | ||
|  | #include <boost/compute/container/vector.hpp> | ||
|  | #include <boost/compute/detail/iterator_range_size.hpp> | ||
|  | 
 | ||
|  | namespace boost { | ||
|  | namespace compute { | ||
|  | namespace detail { | ||
|  | 
 | ||
|  | template<class InputIterator, class T, class BinaryFunction> | ||
|  | inline T generic_accumulate(InputIterator first, | ||
|  |                             InputIterator last, | ||
|  |                             T init, | ||
|  |                             BinaryFunction function, | ||
|  |                             command_queue &queue) | ||
|  | { | ||
|  |     const context &context = queue.get_context(); | ||
|  | 
 | ||
|  |     size_t size = iterator_range_size(first, last); | ||
|  |     if(size == 0){ | ||
|  |         return init; | ||
|  |     } | ||
|  | 
 | ||
|  |     // accumulate on device | ||
|  |     array<T, 1> device_result(context); | ||
|  |     detail::serial_accumulate( | ||
|  |         first, last, device_result.begin(), init, function, queue | ||
|  |     ); | ||
|  | 
 | ||
|  |     // copy result to host | ||
|  |     T result; | ||
|  |     ::boost::compute::copy_n(device_result.begin(), 1, &result, queue); | ||
|  |     return result; | ||
|  | } | ||
|  | 
 | ||
|  | // returns true if we can use reduce() instead of accumulate() when | ||
|  | // accumulate() this is true when the function is commutative (such as | ||
|  | // addition of integers) and the initial value is the identity value | ||
|  | // for the operation (zero for addition, one for multiplication). | ||
|  | template<class T, class F> | ||
|  | inline bool can_accumulate_with_reduce(T init, F function) | ||
|  | { | ||
|  |     (void) init; | ||
|  |     (void) function; | ||
|  | 
 | ||
|  |     return false; | ||
|  | } | ||
|  | 
 | ||
|  | /// \internal_ | ||
|  | #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \ | ||
|  |     inline bool can_accumulate_with_reduce(type init, plus<type>) \ | ||
|  |     { \ | ||
|  |         return init == type(0); \ | ||
|  |     } \ | ||
|  |     inline bool can_accumulate_with_reduce(type init, multiplies<type>) \ | ||
|  |     { \ | ||
|  |         return init == type(1); \ | ||
|  |     } | ||
|  | 
 | ||
|  | BOOST_PP_SEQ_FOR_EACH( | ||
|  |     BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE, | ||
|  |     _, | ||
|  |     (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_) | ||
|  | ) | ||
|  | 
 | ||
|  | template<class T> | ||
|  | inline bool can_accumulate_with_reduce(T init, min<T>) | ||
|  | { | ||
|  |     return init == (std::numeric_limits<T>::max)(); | ||
|  | } | ||
|  | 
 | ||
|  | template<class T> | ||
|  | inline bool can_accumulate_with_reduce(T init, max<T>) | ||
|  | { | ||
|  |     return init == (std::numeric_limits<T>::min)(); | ||
|  | } | ||
|  | 
 | ||
|  | #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE | ||
|  | 
 | ||
|  | template<class InputIterator, class T, class BinaryFunction> | ||
|  | inline T dispatch_accumulate(InputIterator first, | ||
|  |                              InputIterator last, | ||
|  |                              T init, | ||
|  |                              BinaryFunction function, | ||
|  |                              command_queue &queue) | ||
|  | { | ||
|  |     size_t size = iterator_range_size(first, last); | ||
|  |     if(size == 0){ | ||
|  |         return init; | ||
|  |     } | ||
|  | 
 | ||
|  |     if(can_accumulate_with_reduce(init, function)){ | ||
|  |         T result; | ||
|  |         reduce(first, last, &result, function, queue); | ||
|  |         return result; | ||
|  |     } | ||
|  |     else { | ||
|  |         return generic_accumulate(first, last, init, function, queue); | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | } // end detail namespace | ||
|  | 
 | ||
|  | /// Returns the result of applying \p function to the elements in the | ||
|  | /// range [\p first, \p last) and \p init. | ||
|  | /// | ||
|  | /// If no function is specified, \c plus will be used. | ||
|  | /// | ||
|  | /// \param first first element in the input range | ||
|  | /// \param last last element in the input range | ||
|  | /// \param init initial value | ||
|  | /// \param function binary reduction function | ||
|  | /// \param queue command queue to perform the operation | ||
|  | /// | ||
|  | /// \return the accumulated result value | ||
|  | /// | ||
|  | /// In specific situations the call to \c accumulate() can be automatically | ||
|  | /// optimized to a call to the more efficient \c reduce() algorithm. This | ||
|  | /// occurs when the binary reduction function is recognized as associative | ||
|  | /// (such as the \c plus<int> function). | ||
|  | /// | ||
|  | /// Note that because floating-point addition is not associative, calling | ||
|  | /// \c accumulate() with \c plus<float> results in a less efficient serial | ||
|  | /// reduction algorithm being executed. If a slight loss in precision is | ||
|  | /// acceptable, the more efficient parallel \c reduce() algorithm should be | ||
|  | /// used instead. | ||
|  | /// | ||
|  | /// For example: | ||
|  | /// \code | ||
|  | /// // with vec = boost::compute::vector<int> | ||
|  | /// accumulate(vec.begin(), vec.end(), 0, plus<int>());   // fast | ||
|  | /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast | ||
|  | /// | ||
|  | /// // with vec = boost::compute::vector<float> | ||
|  | /// accumulate(vec.begin(), vec.end(), 0, plus<float>());   // slow | ||
|  | /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast | ||
|  | /// \endcode | ||
|  | /// | ||
|  | /// \see reduce() | ||
|  | template<class InputIterator, class T, class BinaryFunction> | ||
|  | inline T accumulate(InputIterator first, | ||
|  |                     InputIterator last, | ||
|  |                     T init, | ||
|  |                     BinaryFunction function, | ||
|  |                     command_queue &queue = system::default_queue()) | ||
|  | { | ||
|  |     return detail::dispatch_accumulate(first, last, init, function, queue); | ||
|  | } | ||
|  | 
 | ||
|  | /// \overload | ||
|  | template<class InputIterator, class T> | ||
|  | inline T accumulate(InputIterator first, | ||
|  |                     InputIterator last, | ||
|  |                     T init, | ||
|  |                     command_queue &queue = system::default_queue()) | ||
|  | { | ||
|  |     typedef typename std::iterator_traits<InputIterator>::value_type IT; | ||
|  | 
 | ||
|  |     return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue); | ||
|  | } | ||
|  | 
 | ||
|  | } // end compute namespace | ||
|  | } // end boost namespace | ||
|  | 
 | ||
|  | #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP |