Bolt  1.1
C++ template library with support for OpenCL
control.h
Go to the documentation of this file.
1 /***************************************************************************
2 * Copyright 2012 - 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 
16 ***************************************************************************/
17 
18 
22 #if !defined( BOLT_AMP_CONTROL_H )
23 #define BOLT_AMP_CONTROL_H
24 
25 #pragma once
26 
27 #include <amp.h>
28 #include <string>
29 #include <map>
30 
31 namespace bolt
32 {
33 namespace amp
34 {
35 
77 class control {
78 public:
79  enum e_UseHostMode {
80  NoUseHost,
81  UseHost};
82  enum e_RunMode {
83  Automatic,
84  SerialCpu,
85  MultiCoreCpu,
86  Gpu };
87 
88  enum e_AutoTuneMode{NoAutoTune=0x0,
89  AutoTuneDevice=0x1,
90  AutoTuneWorkShape=0x2,
91  AutoTuneAll=0x3}; // FIXME, experimental
92  struct debug {
93  static const unsigned None=0;
94  static const unsigned Compile = 0x1;
95  static const unsigned ShowCode = 0x2;
96  static const unsigned SaveCompilerTemps = 0x4;
97  static const unsigned DebugKernelRun = 0x8;
98  static const unsigned AutoTune = 0x10;
99  };
100 
101  enum e_WaitMode {
102  BalancedWait, // Balance of Busy and Nice: tries to use Busy for short-running kernels. \todo: Balanced currently maps to nice.
103  NiceWait, // Use an OS semaphore to detect completion status.
104  BusyWait, // Busy a CPU core continuously monitoring results. Lowest-latency, but requires a dedicated core.
105  ClFinish, // Call clFinish on the queue.
106  };
107 
108 public:
109 
110  // Construct a new control structure, copying from default control for arguments that are not overridden.
111  control(
112  Concurrency::accelerator accel=getDefault().getAccelerator(),
113  e_UseHostMode useHost=getDefault().getUseHost(),
114  unsigned debug=getDefault().getDebug()
115  ):
116  m_accelerator(accel),
117  m_useHost(useHost),
118  m_forceRunMode(getDefault().m_forceRunMode),
119  m_debug(debug),
120  m_autoTune(getDefault().m_autoTune),
121  m_wgPerComputeUnit(getDefault().m_wgPerComputeUnit),
122  m_waitMode(getDefault().m_waitMode),
123  m_unroll(getDefault().m_unroll)
124  {};
125 
126  control( const control& ref) :
127  m_accelerator(ref.m_accelerator),
128  m_useHost(ref.m_useHost),
129  m_forceRunMode(ref.m_forceRunMode),
130  m_debug(ref.m_debug),
131  m_autoTune(ref.m_autoTune),
132  m_wgPerComputeUnit(ref.m_wgPerComputeUnit),
133  m_waitMode(ref.m_waitMode),
134  m_unroll(ref.m_unroll)
135  {
136  //printf("control::copy construcor\n");
137  };
138 
139  //setters:
141  void setAccelerator(::Concurrency::accelerator accel) { m_accelerator = accel; };
142 
146  void setUseHost(e_UseHostMode useHost) { m_useHost = useHost; };
147 
152  void setForceRunMode(e_RunMode forceRunMode) { m_forceRunMode = forceRunMode; };
153 
164  void setDebug(unsigned debug) { m_debug = debug; };
165 
170  void setWGPerComputeUnit(int wgPerComputeUnit) { m_wgPerComputeUnit = wgPerComputeUnit; };
171 
173  void setWaitMode(e_WaitMode waitMode) { m_waitMode = waitMode; };
174 
176  void setUnroll(int unroll) { m_unroll = unroll; };
177 
178  // getters:
179  Concurrency::accelerator& getAccelerator( ) { return m_accelerator; };
180  const Concurrency::accelerator& getAccelerator( ) const { return m_accelerator; };
181 
182  e_UseHostMode getUseHost() const { return m_useHost; };
183  e_RunMode getForceRunMode() const { return m_forceRunMode; };
184  unsigned getDebug() const { return m_debug;};
185  int const getWGPerComputeUnit() const { return m_wgPerComputeUnit; };
186  e_WaitMode getWaitMode() const { return m_waitMode; };
187  int getUnroll() const { return m_unroll; };
188 
204  static control &getDefault()
205  {
206  // Default control structure; this can be accessed by the bolt::cl::control::getDefault()
207  static control _defaultControl( true );
208  return _defaultControl;
209  };
210 
211  //TODO - implement the below function in control.cpp
212  /*static void printPlatforms( bool printDevices = true, cl_device_type deviceType = CL_DEVICE_TYPE_ALL );
213  static void printPlatformsRange( std::vector< ::cl::Platform >::iterator begin, std::vector< ::cl::Platform >::iterator end,
214  bool printDevices = true, cl_device_type deviceType = CL_DEVICE_TYPE_ALL );*/
215 
216 private:
217 
218  // This is the private constructor is only used to create the initial default control structure.
219  control(bool createGlobal) :
220  m_accelerator( Concurrency::accelerator::default_accelerator ),
221  m_useHost(UseHost),
222  m_forceRunMode(Automatic),
223  m_debug(debug::None),
224  m_autoTune(AutoTuneAll),
225  m_wgPerComputeUnit(8),
226  m_waitMode(BusyWait),
227  m_unroll(1)
228  {};
229 
230  //::cl::CommandQueue m_commandQueue;
231  ::Concurrency::accelerator m_accelerator;
232  e_UseHostMode m_useHost;
233  e_RunMode m_forceRunMode;
234  e_AutoTuneMode m_autoTune; /* auto-tune the choice of device CPU/GPU and workgroup shape */
235  unsigned m_debug;
236  int m_wgPerComputeUnit;
237  e_WaitMode m_waitMode;
238  int m_unroll;
239 };
240 };
241 };
242 
243 // Implementor note:
244 // When adding a new field to this structure, don't forget to:
245 // * Add the new field, ie "int _foo.
246 // * Add setter function and getter function, ie "void foo(int fooValue)" and "int foo const { return _foo; }"
247 // * Add the field to the private constructor. This is used to set the global default "_defaultControl".
248 // * Add the field to the public constructor, copying from the _defaultControl.
249 
250 // Sample usage:
251 // Concurrency::accelerator::default_accelerator
252 // bolt::amp::control ctl(Concurrency::accelerator::default_accelerator);
253 // c.debug(bolt::amp::control::ShowCompile);
254 // bolt::amp::reduce(ctl, a.begin(), a.end(), std::plus<int>);
255 
256 
257 #endif