419 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			419 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| 
								 | 
							
								//---------------------------------------------------------------------------//
							 | 
						||
| 
								 | 
							
								// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// Distributed under the Boost Software License, Version 1.0
							 | 
						||
| 
								 | 
							
								// See accompanying file LICENSE_1_0.txt or copy at
							 | 
						||
| 
								 | 
							
								// http://www.boost.org/LICENSE_1_0.txt
							 | 
						||
| 
								 | 
							
								//
							 | 
						||
| 
								 | 
							
								// See http://boostorg.github.com/compute for more information.
							 | 
						||
| 
								 | 
							
								//---------------------------------------------------------------------------//
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef BOOST_COMPUTE_KERNEL_HPP
							 | 
						||
| 
								 | 
							
								#define BOOST_COMPUTE_KERNEL_HPP
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <string>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <boost/assert.hpp>
							 | 
						||
| 
								 | 
							
								#include <boost/utility/enable_if.hpp>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <boost/compute/config.hpp>
							 | 
						||
| 
								 | 
							
								#include <boost/compute/program.hpp>
							 | 
						||
| 
								 | 
							
								#include <boost/compute/exception.hpp>
							 | 
						||
| 
								 | 
							
								#include <boost/compute/type_traits/is_fundamental.hpp>
							 | 
						||
| 
								 | 
							
								#include <boost/compute/detail/get_object_info.hpp>
							 | 
						||
| 
								 | 
							
								#include <boost/compute/detail/assert_cl_success.hpp>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								namespace boost {
							 | 
						||
| 
								 | 
							
								namespace compute {
							 | 
						||
| 
								 | 
							
								namespace detail {
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								template<class T> struct set_kernel_arg;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								} // end detail namespace
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/// \class kernel
							 | 
						||
| 
								 | 
							
								/// \brief A compute kernel.
							 | 
						||
| 
								 | 
							
								///
							 | 
						||
| 
								 | 
							
								/// \see command_queue, program
							 | 
						||
| 
								 | 
							
								class kernel
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								public:
							 | 
						||
| 
								 | 
							
								    /// Creates a null kernel object.
							 | 
						||
| 
								 | 
							
								    kernel()
							 | 
						||
| 
								 | 
							
								        : m_kernel(0)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Creates a new kernel object for \p kernel. If \p retain is
							 | 
						||
| 
								 | 
							
								    /// \c true, the reference count for \p kernel will be incremented.
							 | 
						||
| 
								 | 
							
								    explicit kernel(cl_kernel kernel, bool retain = true)
							 | 
						||
| 
								 | 
							
								        : m_kernel(kernel)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if(m_kernel && retain){
							 | 
						||
| 
								 | 
							
								            clRetainKernel(m_kernel);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Creates a new kernel object with \p name from \p program.
							 | 
						||
| 
								 | 
							
								    kernel(const program &program, const std::string &name)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        cl_int error = 0;
							 | 
						||
| 
								 | 
							
								        m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if(!m_kernel){
							 | 
						||
| 
								 | 
							
								            BOOST_THROW_EXCEPTION(opencl_error(error));
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Creates a new kernel object as a copy of \p other.
							 | 
						||
| 
								 | 
							
								    kernel(const kernel &other)
							 | 
						||
| 
								 | 
							
								        : m_kernel(other.m_kernel)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if(m_kernel){
							 | 
						||
| 
								 | 
							
								            clRetainKernel(m_kernel);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Copies the kernel object from \p other to \c *this.
							 | 
						||
| 
								 | 
							
								    kernel& operator=(const kernel &other)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if(this != &other){
							 | 
						||
| 
								 | 
							
								            if(m_kernel){
							 | 
						||
| 
								 | 
							
								                clReleaseKernel(m_kernel);
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            m_kernel = other.m_kernel;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            if(m_kernel){
							 | 
						||
| 
								 | 
							
								                clRetainKernel(m_kernel);
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return *this;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
							 | 
						||
| 
								 | 
							
								    /// Move-constructs a new kernel object from \p other.
							 | 
						||
| 
								 | 
							
								    kernel(kernel&& other) BOOST_NOEXCEPT
							 | 
						||
| 
								 | 
							
								        : m_kernel(other.m_kernel)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        other.m_kernel = 0;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Move-assigns the kernel from \p other to \c *this.
							 | 
						||
| 
								 | 
							
								    kernel& operator=(kernel&& other) BOOST_NOEXCEPT
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if(m_kernel){
							 | 
						||
| 
								 | 
							
								            clReleaseKernel(m_kernel);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        m_kernel = other.m_kernel;
							 | 
						||
| 
								 | 
							
								        other.m_kernel = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return *this;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Destroys the kernel object.
							 | 
						||
| 
								 | 
							
								    ~kernel()
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if(m_kernel){
							 | 
						||
| 
								 | 
							
								            BOOST_COMPUTE_ASSERT_CL_SUCCESS(
							 | 
						||
| 
								 | 
							
								                clReleaseKernel(m_kernel)
							 | 
						||
| 
								 | 
							
								            );
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns a reference to the underlying OpenCL kernel object.
							 | 
						||
| 
								 | 
							
								    cl_kernel& get() const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return const_cast<cl_kernel &>(m_kernel);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns the function name for the kernel.
							 | 
						||
| 
								 | 
							
								    std::string name() const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns the number of arguments for the kernel.
							 | 
						||
| 
								 | 
							
								    size_t arity() const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns the program for the kernel.
							 | 
						||
| 
								 | 
							
								    program get_program() const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns the context for the kernel.
							 | 
						||
| 
								 | 
							
								    context get_context() const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns information about the kernel.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \see_opencl_ref{clGetKernelInfo}
							 | 
						||
| 
								 | 
							
								    template<class T>
							 | 
						||
| 
								 | 
							
								    T get_info(cl_kernel_info info) const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \overload
							 | 
						||
| 
								 | 
							
								    template<int Enum>
							 | 
						||
| 
								 | 
							
								    typename detail::get_object_info_type<kernel, Enum>::type
							 | 
						||
| 
								 | 
							
								    get_info() const;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
							 | 
						||
| 
								 | 
							
								    /// Returns information about the argument at \p index.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// For example, to get the name of the first argument:
							 | 
						||
| 
								 | 
							
								    /// \code
							 | 
						||
| 
								 | 
							
								    /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
							 | 
						||
| 
								 | 
							
								    /// \endcode
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// Note, this function requires that the program be compiled with the
							 | 
						||
| 
								 | 
							
								    /// \c "-cl-kernel-arg-info" flag. For example:
							 | 
						||
| 
								 | 
							
								    /// \code
							 | 
						||
| 
								 | 
							
								    /// program.build("-cl-kernel-arg-info");
							 | 
						||
| 
								 | 
							
								    /// \endcode
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \opencl_version_warning{1,2}
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \see_opencl_ref{clGetKernelArgInfo}
							 | 
						||
| 
								 | 
							
								    template<class T>
							 | 
						||
| 
								 | 
							
								    T get_arg_info(size_t index, cl_kernel_arg_info info) const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return detail::get_object_info<T>(
							 | 
						||
| 
								 | 
							
								            clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
							 | 
						||
| 
								 | 
							
								        );
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \overload
							 | 
						||
| 
								 | 
							
								    template<int Enum>
							 | 
						||
| 
								 | 
							
								    typename detail::get_object_info_type<kernel, Enum>::type
							 | 
						||
| 
								 | 
							
								    get_arg_info(size_t index) const;
							 | 
						||
| 
								 | 
							
								    #endif // CL_VERSION_1_2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns work-group information for the kernel with \p device.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \see_opencl_ref{clGetKernelWorkGroupInfo}
							 | 
						||
| 
								 | 
							
								    template<class T>
							 | 
						||
| 
								 | 
							
								    T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Sets the argument at \p index to \p value with \p size.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \see_opencl_ref{clSetKernelArg}
							 | 
						||
| 
								 | 
							
								    void set_arg(size_t index, size_t size, const void *value)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        BOOST_ASSERT(index < arity());
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        cl_int ret = clSetKernelArg(m_kernel,
							 | 
						||
| 
								 | 
							
								                                    static_cast<cl_uint>(index),
							 | 
						||
| 
								 | 
							
								                                    size,
							 | 
						||
| 
								 | 
							
								                                    value);
							 | 
						||
| 
								 | 
							
								        if(ret != CL_SUCCESS){
							 | 
						||
| 
								 | 
							
								            BOOST_THROW_EXCEPTION(opencl_error(ret));
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Sets the argument at \p index to \p value.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
							 | 
						||
| 
								 | 
							
								    /// calling set_arg(index, sizeof(type), &value).
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// Additionally, this method is specialized for device memory objects
							 | 
						||
| 
								 | 
							
								    /// such as buffer and image2d. This allows for them to be passed directly
							 | 
						||
| 
								 | 
							
								    /// without having to extract their underlying cl_mem object.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// This method is also specialized for device container types such as
							 | 
						||
| 
								 | 
							
								    /// vector<T> and array<T, N>. This allows for them to be passed directly
							 | 
						||
| 
								 | 
							
								    /// as kernel arguments without having to extract their underlying buffer.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// For setting local memory arguments (e.g. "__local float *buf"), the
							 | 
						||
| 
								 | 
							
								    /// local_buffer<T> class may be used:
							 | 
						||
| 
								 | 
							
								    /// \code
							 | 
						||
| 
								 | 
							
								    /// // set argument to a local buffer with storage for 32 float's
							 | 
						||
| 
								 | 
							
								    /// kernel.set_arg(0, local_buffer<float>(32));
							 | 
						||
| 
								 | 
							
								    /// \endcode
							 | 
						||
| 
								 | 
							
								    template<class T>
							 | 
						||
| 
								 | 
							
								    void set_arg(size_t index, const T &value)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        // if you get a compilation error pointing here it means you
							 | 
						||
| 
								 | 
							
								        // attempted to set a kernel argument from an invalid type.
							 | 
						||
| 
								 | 
							
								        detail::set_kernel_arg<T>()(*this, index, value);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    void set_arg(size_t index, const cl_mem mem)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    void set_arg(size_t index, const cl_sampler sampler)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    void set_arg_svm_ptr(size_t index, void* ptr)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        #ifdef CL_VERSION_2_0
							 | 
						||
| 
								 | 
							
								        cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
							 | 
						||
| 
								 | 
							
								        if(ret != CL_SUCCESS){
							 | 
						||
| 
								 | 
							
								            BOOST_THROW_EXCEPTION(opencl_error(ret));
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        #else
							 | 
						||
| 
								 | 
							
								        (void) index;
							 | 
						||
| 
								 | 
							
								        (void) ptr;
							 | 
						||
| 
								 | 
							
								        BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
							 | 
						||
| 
								 | 
							
								        #endif
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
							 | 
						||
| 
								 | 
							
								    /// Sets the arguments for the kernel to \p args.
							 | 
						||
| 
								 | 
							
								    template<class... T>
							 | 
						||
| 
								 | 
							
								    void set_args(T&&... args)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        BOOST_ASSERT(sizeof...(T) <= arity());
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        _set_args<0>(args...);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
							 | 
						||
| 
								 | 
							
								    /// Sets additional execution information for the kernel.
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \opencl_version_warning{2,0}
							 | 
						||
| 
								 | 
							
								    ///
							 | 
						||
| 
								 | 
							
								    /// \see_opencl2_ref{clSetKernelExecInfo}
							 | 
						||
| 
								 | 
							
								    void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
							 | 
						||
| 
								 | 
							
								        if(ret != CL_SUCCESS){
							 | 
						||
| 
								 | 
							
								            BOOST_THROW_EXCEPTION(opencl_error(ret));
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    #endif // CL_VERSION_2_0
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns \c true if the kernel is the same at \p other.
							 | 
						||
| 
								 | 
							
								    bool operator==(const kernel &other) const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return m_kernel == other.m_kernel;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// Returns \c true if the kernel is different from \p other.
							 | 
						||
| 
								 | 
							
								    bool operator!=(const kernel &other) const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return m_kernel != other.m_kernel;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    operator cl_kernel() const
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return m_kernel;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    static kernel create_with_source(const std::string &source,
							 | 
						||
| 
								 | 
							
								                                     const std::string &name,
							 | 
						||
| 
								 | 
							
								                                     const context &context)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return program::build_with_source(source, context).create_kernel(name);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								    #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    template<size_t N>
							 | 
						||
| 
								 | 
							
								    void _set_args()
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /// \internal_
							 | 
						||
| 
								 | 
							
								    template<size_t N, class T, class... Args>
							 | 
						||
| 
								 | 
							
								    void _set_args(T&& arg, Args&&... rest)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        set_arg(N, arg);
							 | 
						||
| 
								 | 
							
								        _set_args<N+1>(rest...);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								private:
							 | 
						||
| 
								 | 
							
								    cl_kernel m_kernel;
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								inline kernel program::create_kernel(const std::string &name) const
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    return kernel(*this, name);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/// \internal_ define get_info() specializations for kernel
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
							 | 
						||
| 
								 | 
							
								    ((std::string, CL_KERNEL_FUNCTION_NAME))
							 | 
						||
| 
								 | 
							
								    ((cl_uint, CL_KERNEL_NUM_ARGS))
							 | 
						||
| 
								 | 
							
								    ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
							 | 
						||
| 
								 | 
							
								    ((cl_context, CL_KERNEL_CONTEXT))
							 | 
						||
| 
								 | 
							
								    ((cl_program, CL_KERNEL_PROGRAM))
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef CL_VERSION_1_2
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
							 | 
						||
| 
								 | 
							
								    ((std::string, CL_KERNEL_ATTRIBUTES))
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								#endif // CL_VERSION_1_2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/// \internal_ define get_arg_info() specializations for kernel
							 | 
						||
| 
								 | 
							
								#ifdef CL_VERSION_1_2
							 | 
						||
| 
								 | 
							
								#define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
							 | 
						||
| 
								 | 
							
								    namespace detail { \
							 | 
						||
| 
								 | 
							
								        template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
							 | 
						||
| 
								 | 
							
								    } \
							 | 
						||
| 
								 | 
							
								    template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
							 | 
						||
| 
								 | 
							
								        return get_arg_info<result_type>(index, value); \
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
							 | 
						||
| 
								 | 
							
								BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
							 | 
						||
| 
								 | 
							
								#endif // CL_VERSION_1_2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								namespace detail {
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// set_kernel_arg implementation for built-in types
							 | 
						||
| 
								 | 
							
								template<class T>
							 | 
						||
| 
								 | 
							
								struct set_kernel_arg
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    typename boost::enable_if<is_fundamental<T> >::type
							 | 
						||
| 
								 | 
							
								    operator()(kernel &kernel_, size_t index, const T &value)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        kernel_.set_arg(index, sizeof(T), &value);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// set_kernel_arg specialization for char (different from built-in cl_char)
							 | 
						||
| 
								 | 
							
								template<>
							 | 
						||
| 
								 | 
							
								struct set_kernel_arg<char>
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    void operator()(kernel &kernel_, size_t index, const char c)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        kernel_.set_arg(index, sizeof(char), &c);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								} // end detail namespace
							 | 
						||
| 
								 | 
							
								} // end namespace compute
							 | 
						||
| 
								 | 
							
								} // end namespace boost
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#endif // BOOST_COMPUTE_KERNEL_HPP
							 |