307 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			307 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //---------------------------------------------------------------------------//
 | |
| // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
 | |
| //
 | |
| // Distributed under the Boost Software License, Version 1.0
 | |
| // See accompanying file LICENSE_1_0.txt or copy at
 | |
| // http://www.boost.org/LICENSE_1_0.txt
 | |
| //
 | |
| // See http://boostorg.github.com/compute for more information.
 | |
| //---------------------------------------------------------------------------//
 | |
| 
 | |
| #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
 | |
| #define BOOST_COMPUTE_ALGORITHM_FILL_HPP
 | |
| 
 | |
| #include <iterator>
 | |
| 
 | |
| #include <boost/mpl/int.hpp>
 | |
| #include <boost/mpl/vector.hpp>
 | |
| #include <boost/mpl/contains.hpp>
 | |
| #include <boost/utility/enable_if.hpp>
 | |
| 
 | |
| #include <boost/compute/cl.hpp>
 | |
| #include <boost/compute/system.hpp>
 | |
| #include <boost/compute/command_queue.hpp>
 | |
| #include <boost/compute/algorithm/copy.hpp>
 | |
| #include <boost/compute/async/future.hpp>
 | |
| #include <boost/compute/iterator/constant_iterator.hpp>
 | |
| #include <boost/compute/iterator/discard_iterator.hpp>
 | |
| #include <boost/compute/detail/is_buffer_iterator.hpp>
 | |
| #include <boost/compute/detail/iterator_range_size.hpp>
 | |
| 
 | |
| namespace boost {
 | |
| namespace compute {
 | |
| namespace detail {
 | |
| 
 | |
| namespace mpl = boost::mpl;
 | |
| 
 | |
| // fills the range [first, first + count) with value using copy()
 | |
| template<class BufferIterator, class T>
 | |
| inline void fill_with_copy(BufferIterator first,
 | |
|                            size_t count,
 | |
|                            const T &value,
 | |
|                            command_queue &queue)
 | |
| {
 | |
|     ::boost::compute::copy(
 | |
|         ::boost::compute::make_constant_iterator(value, 0),
 | |
|         ::boost::compute::make_constant_iterator(value, count),
 | |
|         first,
 | |
|         queue
 | |
|     );
 | |
| }
 | |
| 
 | |
| // fills the range [first, first + count) with value using copy_async()
 | |
| template<class BufferIterator, class T>
 | |
| inline future<void> fill_async_with_copy(BufferIterator first,
 | |
|                                          size_t count,
 | |
|                                          const T &value,
 | |
|                                          command_queue &queue)
 | |
| {
 | |
|     return ::boost::compute::copy_async(
 | |
|                ::boost::compute::make_constant_iterator(value, 0),
 | |
|                ::boost::compute::make_constant_iterator(value, count),
 | |
|                first,
 | |
|                queue
 | |
|            );
 | |
| }
 | |
| 
 | |
| #if defined(CL_VERSION_1_2)
 | |
| 
 | |
| // meta-function returing true if Iterator points to a range of values
 | |
| // that can be filled using clEnqueueFillBuffer(). to meet this criteria
 | |
| // it must have a buffer accessible through iter.get_buffer() and the
 | |
| // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
 | |
| template<class Iterator>
 | |
| struct is_valid_fill_buffer_iterator :
 | |
|     public mpl::and_<
 | |
|         is_buffer_iterator<Iterator>,
 | |
|         mpl::contains<
 | |
|             mpl::vector<
 | |
|                 mpl::int_<1>,
 | |
|                 mpl::int_<2>,
 | |
|                 mpl::int_<4>,
 | |
|                 mpl::int_<8>,
 | |
|                 mpl::int_<16>,
 | |
|                 mpl::int_<32>,
 | |
|                 mpl::int_<64>,
 | |
|                 mpl::int_<128>
 | |
|             >,
 | |
|             mpl::int_<
 | |
|                 sizeof(typename std::iterator_traits<Iterator>::value_type)
 | |
|             >
 | |
|         >
 | |
|     >::type { };
 | |
| 
 | |
| template<>
 | |
| struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
 | |
| 
 | |
| // specialization which uses clEnqueueFillBuffer for buffer iterators
 | |
| template<class BufferIterator, class T>
 | |
| inline void
 | |
| dispatch_fill(BufferIterator first,
 | |
|               size_t count,
 | |
|               const T &value,
 | |
|               command_queue &queue,
 | |
|               typename boost::enable_if<
 | |
|                  is_valid_fill_buffer_iterator<BufferIterator>
 | |
|               >::type* = 0)
 | |
| {
 | |
|     typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
 | |
| 
 | |
|     if(count == 0){
 | |
|         // nothing to do
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
 | |
|     if(!queue.check_device_version(1, 2)){
 | |
|         return fill_with_copy(first, count, value, queue);
 | |
|     }
 | |
| 
 | |
|     value_type pattern = static_cast<value_type>(value);
 | |
|     size_t offset = static_cast<size_t>(first.get_index());
 | |
| 
 | |
|     if(count == 1){
 | |
|         // use clEnqueueWriteBuffer() directly when writing a single value
 | |
|         // to the device buffer. this is potentially more efficient and also
 | |
|         // works around a bug in the intel opencl driver.
 | |
|         queue.enqueue_write_buffer(
 | |
|             first.get_buffer(),
 | |
|             offset * sizeof(value_type),
 | |
|             sizeof(value_type),
 | |
|             &pattern
 | |
|         );
 | |
|     }
 | |
|     else {
 | |
|         queue.enqueue_fill_buffer(
 | |
|             first.get_buffer(),
 | |
|             &pattern,
 | |
|             sizeof(value_type),
 | |
|             offset * sizeof(value_type),
 | |
|             count * sizeof(value_type)
 | |
|         );
 | |
|     }
 | |
| }
 | |
| 
 | |
| template<class BufferIterator, class T>
 | |
| inline future<void>
 | |
| dispatch_fill_async(BufferIterator first,
 | |
|                     size_t count,
 | |
|                     const T &value,
 | |
|                     command_queue &queue,
 | |
|                     typename boost::enable_if<
 | |
|                        is_valid_fill_buffer_iterator<BufferIterator>
 | |
|                     >::type* = 0)
 | |
| {
 | |
|     typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
 | |
| 
 | |
|     // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
 | |
|     if(!queue.check_device_version(1, 2)){
 | |
|         return fill_async_with_copy(first, count, value, queue);
 | |
|     }
 | |
| 
 | |
|     value_type pattern = static_cast<value_type>(value);
 | |
|     size_t offset = static_cast<size_t>(first.get_index());
 | |
| 
 | |
|     event event_ =
 | |
|         queue.enqueue_fill_buffer(first.get_buffer(),
 | |
|                                   &pattern,
 | |
|                                   sizeof(value_type),
 | |
|                                   offset * sizeof(value_type),
 | |
|                                   count * sizeof(value_type));
 | |
| 
 | |
|     return future<void>(event_);
 | |
| }
 | |
| 
 | |
| #ifdef CL_VERSION_2_0
 | |
| // specializations for svm_ptr<T>
 | |
| template<class T>
 | |
| inline void dispatch_fill(svm_ptr<T> first,
 | |
|                           size_t count,
 | |
|                           const T &value,
 | |
|                           command_queue &queue)
 | |
| {
 | |
|     if(count == 0){
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     queue.enqueue_svm_fill(
 | |
|         first.get(), &value, sizeof(T), count * sizeof(T)
 | |
|     );
 | |
| }
 | |
| 
 | |
| template<class T>
 | |
| inline future<void> dispatch_fill_async(svm_ptr<T> first,
 | |
|                                         size_t count,
 | |
|                                         const T &value,
 | |
|                                         command_queue &queue)
 | |
| {
 | |
|     if(count == 0){
 | |
|         return future<void>();
 | |
|     }
 | |
| 
 | |
|     event event_ = queue.enqueue_svm_fill(
 | |
|         first.get(), &value, sizeof(T), count * sizeof(T)
 | |
|     );
 | |
| 
 | |
|     return future<void>(event_);
 | |
| }
 | |
| #endif // CL_VERSION_2_0
 | |
| 
 | |
| // default implementations
 | |
| template<class BufferIterator, class T>
 | |
| inline void
 | |
| dispatch_fill(BufferIterator first,
 | |
|               size_t count,
 | |
|               const T &value,
 | |
|               command_queue &queue,
 | |
|               typename boost::disable_if<
 | |
|                   is_valid_fill_buffer_iterator<BufferIterator>
 | |
|               >::type* = 0)
 | |
| {
 | |
|     fill_with_copy(first, count, value, queue);
 | |
| }
 | |
| 
 | |
| template<class BufferIterator, class T>
 | |
| inline future<void>
 | |
| dispatch_fill_async(BufferIterator first,
 | |
|                     size_t count,
 | |
|                     const T &value,
 | |
|                     command_queue &queue,
 | |
|                     typename boost::disable_if<
 | |
|                         is_valid_fill_buffer_iterator<BufferIterator>
 | |
|                     >::type* = 0)
 | |
| {
 | |
|     return fill_async_with_copy(first, count, value, queue);
 | |
| }
 | |
| #else
 | |
| template<class BufferIterator, class T>
 | |
| inline void dispatch_fill(BufferIterator first,
 | |
|                           size_t count,
 | |
|                           const T &value,
 | |
|                           command_queue &queue)
 | |
| {
 | |
|     fill_with_copy(first, count, value, queue);
 | |
| }
 | |
| 
 | |
| template<class BufferIterator, class T>
 | |
| inline future<void> dispatch_fill_async(BufferIterator first,
 | |
|                                         size_t count,
 | |
|                                         const T &value,
 | |
|                                         command_queue &queue)
 | |
| {
 | |
|     return fill_async_with_copy(first, count, value, queue);
 | |
| }
 | |
| #endif // !defined(CL_VERSION_1_2)
 | |
| 
 | |
| } // end detail namespace
 | |
| 
 | |
| /// Fills the range [\p first, \p last) with \p value.
 | |
| ///
 | |
| /// \param first first element in the range to fill
 | |
| /// \param last last element in the range to fill
 | |
| /// \param value value to copy to each element
 | |
| /// \param queue command queue to perform the operation
 | |
| ///
 | |
| /// For example, to fill a vector on the device with sevens:
 | |
| /// \code
 | |
| /// // vector on the device
 | |
| /// boost::compute::vector<int> vec(10, context);
 | |
| ///
 | |
| /// // fill vector with sevens
 | |
| /// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
 | |
| /// \endcode
 | |
| ///
 | |
| /// \see boost::compute::fill_n()
 | |
| template<class BufferIterator, class T>
 | |
| inline void fill(BufferIterator first,
 | |
|                  BufferIterator last,
 | |
|                  const T &value,
 | |
|                  command_queue &queue = system::default_queue())
 | |
| {
 | |
|     size_t count = detail::iterator_range_size(first, last);
 | |
|     if(count == 0){
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     detail::dispatch_fill(first, count, value, queue);
 | |
| }
 | |
| 
 | |
| template<class BufferIterator, class T>
 | |
| inline future<void> fill_async(BufferIterator first,
 | |
|                                BufferIterator last,
 | |
|                                const T &value,
 | |
|                                command_queue &queue = system::default_queue())
 | |
| {
 | |
|     size_t count = detail::iterator_range_size(first, last);
 | |
|     if(count == 0){
 | |
|         return future<void>();
 | |
|     }
 | |
| 
 | |
|     return detail::dispatch_fill_async(first, count, value, queue);
 | |
| }
 | |
| 
 | |
| } // end compute namespace
 | |
| } // end boost namespace
 | |
| 
 | |
| #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP
 | 
