Bullet Collision Detection & Physics Library
btThreadSupportWin32.cpp
Go to the documentation of this file.
1/*
2Bullet Continuous Collision Detection and Physics Library
3Copyright (c) 2003-2018 Erwin Coumans http://bulletphysics.com
4
5This software is provided 'as-is', without any express or implied warranty.
6In no event will the authors be held liable for any damages arising from the use of this software.
7Permission is granted to anyone to use this software for any purpose,
8including commercial applications, and to alter it and redistribute it freely,
9subject to the following restrictions:
10
111. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
122. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
133. This notice may not be removed or altered from any source distribution.
14*/
15
16#if defined(_WIN32) && BT_THREADSAFE
17
18#include "LinearMath/btScalar.h"
19#include "LinearMath/btMinMax.h"
23#include <windows.h>
24#include <stdio.h>
25
26struct btProcessorInfo
27{
29 int numCores;
30 int numNumaNodes;
31 int numL1Cache;
32 int numL2Cache;
33 int numL3Cache;
35 static const int maxNumTeamMasks = 32;
36 int numTeamMasks;
38};
39
41{
43 for (int i = 0; i < procInfo.numTeamMasks; ++i)
44 {
45 if (procMask & procInfo.processorTeamMasks[i])
46 {
47 return procInfo.processorTeamMasks[i];
48 }
49 }
50 return 0;
51}
52
54{
56 for (int i = 0; i < procInfo.numTeamMasks; ++i)
57 {
58 if (procMask & procInfo.processorTeamMasks[i])
59 {
60 return i;
61 }
62 }
63 return -1;
64}
65
67{
68 int count = 0;
69 while (bits)
70 {
71 if (bits & 1)
72 {
73 count++;
74 }
75 bits >>= 1;
76 }
77 return count;
78}
79
81
83{
84 memset(procInfo, 0, sizeof(*procInfo));
86 (Pfn_GetLogicalProcessorInformation)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
88 {
89 // no info
90 return;
91 }
93 DWORD bufSize = 0;
94 while (true)
95 {
97 {
98 break;
99 }
100 else
101 {
103 {
104 if (buf)
105 {
106 free(buf);
107 }
109 }
110 }
111 }
112
113 int len = bufSize / sizeof(*buf);
114 for (int i = 0; i < len; ++i)
115 {
117 switch (info->Relationship)
118 {
119 case RelationNumaNode:
120 procInfo->numNumaNodes++;
121 break;
122
124 procInfo->numCores++;
125 procInfo->numLogicalProcessors += countSetBits(info->ProcessorMask);
126 break;
127
128 case RelationCache:
129 if (info->Cache.Level == 1)
130 {
131 procInfo->numL1Cache++;
132 }
133 else if (info->Cache.Level == 2)
134 {
135 procInfo->numL2Cache++;
136 }
137 else if (info->Cache.Level == 3)
138 {
139 procInfo->numL3Cache++;
140 // processors that share L3 cache are considered to be on the same team
141 // because they can more easily work together on the same data.
142 // Large performance penalties will occur if 2 or more threads from different
143 // teams attempt to frequently read and modify the same cache lines.
144 //
145 // On the AMD Ryzen 7 CPU for example, the 8 cores on the CPU are split into
146 // 2 CCX units of 4 cores each. Each CCX has a separate L3 cache, so if both
147 // CCXs are operating on the same data, many cycles will be spent keeping the
148 // two caches coherent.
149 if (procInfo->numTeamMasks < btProcessorInfo::maxNumTeamMasks)
150 {
151 procInfo->processorTeamMasks[procInfo->numTeamMasks] = info->ProcessorMask;
152 procInfo->numTeamMasks++;
153 }
154 }
155 break;
156
158 procInfo->numPhysicalPackages++;
159 break;
160 }
161 }
162 free(buf);
163}
164
167{
168public:
169 struct btThreadStatus
170 {
171 int m_taskId;
172 int m_commandId;
173 int m_status;
174
175 ThreadFunc m_userThreadFunc;
176 void* m_userPtr; //for taskDesc etc
177
178 void* m_threadHandle; //this one is calling 'Win32ThreadFunc'
179
180 void* m_eventStartHandle;
181 char m_eventStartHandleName[32];
182
185 };
186
187private:
190 int m_numThreads;
193
194 void startThreads(const ConstructionInfo& threadInfo);
195 void stopThreads();
196 int waitForResponse();
197
198public:
199 btThreadSupportWin32(const ConstructionInfo& threadConstructionInfo);
200 virtual ~btThreadSupportWin32();
201
202 virtual int getNumWorkerThreads() const BT_OVERRIDE { return m_numThreads; }
203 virtual int getCacheFriendlyNumThreads() const BT_OVERRIDE { return countSetBits(m_processorInfo.processorTeamMasks[0]); }
204 virtual int getLogicalToPhysicalCoreRatio() const BT_OVERRIDE { return m_processorInfo.numLogicalProcessors / m_processorInfo.numCores; }
205
206 virtual void runTask(int threadIndex, void* userData) BT_OVERRIDE;
207 virtual void waitForAllTasks() BT_OVERRIDE;
208
209 virtual btCriticalSection* createCriticalSection() BT_OVERRIDE;
210 virtual void deleteCriticalSection(btCriticalSection* criticalSection) BT_OVERRIDE;
211};
212
214{
216}
217
218btThreadSupportWin32::~btThreadSupportWin32()
219{
220 stopThreads();
221}
222
224{
225 btThreadSupportWin32::btThreadStatus* status = (btThreadSupportWin32::btThreadStatus*)lpParam;
226
227 while (1)
228 {
229 WaitForSingleObject(status->m_eventStartHandle, INFINITE);
230 void* userPtr = status->m_userPtr;
231
232 if (userPtr)
233 {
234 btAssert(status->m_status);
235 status->m_userThreadFunc(userPtr);
236 status->m_status = 2;
237 SetEvent(status->m_eventCompleteHandle);
238 }
239 else
240 {
241 //exit Thread
242 status->m_status = 3;
243 printf("Thread with taskId %i with handle %p exiting\n", status->m_taskId, status->m_threadHandle);
244 SetEvent(status->m_eventCompleteHandle);
245 break;
246 }
247 }
248 printf("Thread TERMINATED\n");
249 return 0;
250}
251
252void btThreadSupportWin32::runTask(int threadIndex, void* userData)
253{
255 btAssert(threadIndex >= 0);
257
258 threadStatus.m_commandId = 1;
259 threadStatus.m_status = 1;
260 threadStatus.m_userPtr = userData;
262
264 SetEvent(threadStatus.m_eventStartHandle);
265}
266
267int btThreadSupportWin32::waitForResponse()
268{
270
271 int last = -1;
274 last = res - WAIT_OBJECT_0;
275
277 btAssert(threadStatus.m_threadHandle);
278 btAssert(threadStatus.m_eventCompleteHandle);
279
280 //WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
281 btAssert(threadStatus.m_status > 1);
282 threadStatus.m_status = 0;
283
285 btAssert(last >= 0);
286 m_startedThreadMask &= ~(DWORD_PTR(1) << last);
287
288 return last;
289}
290
291void btThreadSupportWin32::waitForAllTasks()
292{
293 while (m_startedThreadMask)
294 {
296 }
297}
298
299void btThreadSupportWin32::startThreads(const ConstructionInfo& threadConstructionInfo)
300{
301 static int uniqueId = 0;
302 uniqueId++;
308 {
310 }
312 m_numThreads = btMin(procInfo.numLogicalProcessors, int(BT_MAX_THREAD_COUNT)) - 1; // cap to max thread count (-1 because main thread already exists)
313
317
318 // set main thread affinity
320 {
323 }
324
325 for (int i = 0; i < m_numThreads; i++)
326 {
327 printf("starting thread %d\n", i);
328
330
332 SIZE_T dwStackSize = threadConstructionInfo.m_threadStackSize;
337
338 threadStatus.m_userPtr = 0;
339
340 sprintf(threadStatus.m_eventStartHandleName, "es%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
341 threadStatus.m_eventStartHandle = CreateEventA(0, false, false, threadStatus.m_eventStartHandleName);
342
343 sprintf(threadStatus.m_eventCompleteHandleName, "ec%.8s%d%d", threadConstructionInfo.m_uniqueName, uniqueId, i);
344 threadStatus.m_eventCompleteHandle = CreateEventA(0, false, false, threadStatus.m_eventCompleteHandleName);
345
346 m_completeHandles[i] = threadStatus.m_eventCompleteHandle;
347
349 //SetThreadPriority( handle, THREAD_PRIORITY_HIGHEST );
350 // highest priority -- can cause erratic performance when numThreads > numCores
351 // we don't want worker threads to be higher priority than the main thread or the main thread could get
352 // totally shut out and unable to tell the workers to stop
353 //SetThreadPriority( handle, THREAD_PRIORITY_BELOW_NORMAL );
354
355 {
356 int processorId = i + 1; // leave processor 0 for main thread
358 if (teamMask)
359 {
360 // bind each thread to only execute on processors of it's assigned team
361 // - for single-socket Intel x86 CPUs this has no effect (only a single, shared L3 cache so there is only 1 team)
362 // - for multi-socket Intel this will keep threads from migrating from one socket to another
363 // - for AMD Ryzen this will keep threads from migrating from one CCX to another
365 if (mask)
366 {
368 }
369 }
371 }
372
373 threadStatus.m_taskId = i;
374 threadStatus.m_commandId = 0;
375 threadStatus.m_status = 0;
376 threadStatus.m_threadHandle = handle;
377 threadStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
378
379 printf("started %s thread %d with threadHandle %p\n", threadConstructionInfo.m_uniqueName, i, handle);
380 }
381}
382
384void btThreadSupportWin32::stopThreads()
385{
386 for (int i = 0; i < m_activeThreadStatus.size(); i++)
387 {
389 if (threadStatus.m_status > 0)
390 {
391 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
392 }
393
394 threadStatus.m_userPtr = NULL;
395 SetEvent(threadStatus.m_eventStartHandle);
396 WaitForSingleObject(threadStatus.m_eventCompleteHandle, INFINITE);
397
398 CloseHandle(threadStatus.m_eventCompleteHandle);
399 CloseHandle(threadStatus.m_eventStartHandle);
400 CloseHandle(threadStatus.m_threadHandle);
401 }
402
403 m_activeThreadStatus.clear();
404 m_completeHandles.clear();
405}
406
408{
409private:
411
412public:
414 {
416 }
417
419 {
421 }
422
423 void lock()
424 {
426 }
427
428 void unlock()
429 {
431 }
432};
433
434btCriticalSection* btThreadSupportWin32::createCriticalSection()
435{
436 unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32CriticalSection), 16);
438 return cs;
439}
440
441void btThreadSupportWin32::deleteCriticalSection(btCriticalSection* criticalSection)
442{
445}
446
448{
449 return new btThreadSupportWin32(info);
450}
451
452#endif //defined(_WIN32) && BT_THREADSAFE
#define btAlignedFree(ptr)
#define btAlignedAlloc(size, alignment)
const T & btMax(const T &a, const T &b)
Definition btMinMax.h:27
const T & btMin(const T &a, const T &b)
Definition btMinMax.h:21
static int uniqueId
#define btAssert(x)
Definition btScalar.h:153
#define BT_OVERRIDE
Definition btThreads.h:26
const unsigned int BT_MAX_THREAD_COUNT
Definition btThreads.h:31
The btAlignedObjectArray template class uses a subset of the stl::vector interface for its methods It...
virtual int getCacheFriendlyNumThreads() const =0
virtual int getLogicalToPhysicalCoreRatio() const =0
virtual void waitForAllTasks()=0
static btThreadSupportInterface * create(const ConstructionInfo &info)
virtual void runTask(int threadIndex, void *userData)=0
virtual int getNumWorkerThreads() const =0