parallel_kernels.h File Reference

Go to the source code of this file.

Defines

#define B_ID(index, i)   (index+(bStride*(i)))
#define C_ID(index, i)   (index+(cStride*(i)))

Functions

template<typename T >
dxGlobal void cudaComputeAdcfmBT (int4 *bodyIDs, typename vec4< T >::Type *j0, typename vec4< T >::Type *j1, typename vec4< T >::Type *j2, typename vec4< T >::Type *j3, typename vec4< T >::Type *ij0, typename vec4< T >::Type *ij1, typename vec4< T >::Type *ij2, typename vec4< T >::Type *ij3, T *adcfm, T *rhs, T sorParam, int numConstraints)
template<typename T >
dxGlobal void cudaComputeInvMJTT (int4 *bodyIDs, typename vec4< T >::Type *j0, typename vec4< T >::Type *j1, typename vec4< T >::Type *j2, typename vec4< T >::Type *j3, typename vec4< T >::Type *ij0, typename vec4< T >::Type *ij1, typename vec4< T >::Type *ij2, typename vec4< T >::Type *ij3, T *iMass, int numConstraints, typename vec4< T >::Type *ii0, typename vec4< T >::Type *ii1, typename vec4< T >::Type *ii2)
template<typename T >
dxGlobal void cudaIntegrateT (typename vec4< T >::Type *pos, typename vec4< T >::Type *lVel, typename vec4< T >::Type *aVel, float deltaTime, int numConstraints)
template<typename T >
dxGlobal void cudaReduceIterativeCompactT (typename vec4< T >::Type *fc0_reduction, typename vec4< T >::Type *fc1_reduction, const int treePower)
template<typename T >
dxGlobal void cudaReduceLoopedCompactT (typename vec4< T >::Type *fc0_reduction, typename vec4< T >::Type *fc1_reduction, const int size, const int step)
template<typename T , unsigned int blockSize>
dxGlobal void cudaReduceSequentialT (typename vec4< T >::Type *fc0, typename vec4< T >::Type *fc1, typename vec4< T >::Type *fc0_reduction, typename vec4< T >::Type *fc1_reduction, int n)
template<typename T >
dxGlobal void cudaReduceStridedT (typename vec4< T >::Type *fc0, typename vec4< T >::Type *fc1, const typename vec4< T >::Type *fc0_reduction, const typename vec4< T >::Type *fc1_reduction, const int reductionStride, const int bodySize, const int reductionSize)
template<typename T >
dxGlobal void cudaSORLCPT (typename vec4< T >::Type *fc0_reduction, typename vec4< T >::Type *fc1_reduction, T *lambda, const int4 *bodyIDs, const int *fIDs, const typename vec4< T >::Type *j, const typename vec4< T >::Type *ij, const typename vec4< T >::Type *fc0, const typename vec4< T >::Type *fc1, const T *adcfm, const T *rhs, const T *hilo, const int offset, const int numConstraints, const int bStride, const int cStride)
template<typename T >
dxGlobal void cudaZeroT (T *buffer, const int bufferSize)
template<typename T >
static dxDevice T parallel_inf ()
template<>
dxDevice double parallel_inf< double > ()
template<>
dxDevice float parallel_inf< float > ()
template<typename T >
static dxDevice T parallel_zero ()
template<>
dxDevice double parallel_zero< double > ()
template<>
dxDevice double4 parallel_zero< double4 > ()
template<>
dxDevice float parallel_zero< float > ()
template<>
dxDevice float4 parallel_zero< float4 > ()

Variables

static uint2 s_blockDim
static uint2 s_blockIdx
static uint2 s_threadIdx

Define Documentation

#define B_ID ( index,
 )     (index+(bStride*(i)))

Definition at line 5 of file parallel_kernels.h.

#define C_ID ( index,
 )     (index+(cStride*(i)))

Definition at line 4 of file parallel_kernels.h.


Function Documentation

template<typename T >
dxGlobal void cudaComputeAdcfmBT ( int4 bodyIDs,
typename vec4< T >::Type *  j0,
typename vec4< T >::Type *  j1,
typename vec4< T >::Type *  j2,
typename vec4< T >::Type *  j3,
typename vec4< T >::Type *  ij0,
typename vec4< T >::Type *  ij1,
typename vec4< T >::Type *  ij2,
typename vec4< T >::Type *  ij3,
T *  adcfm,
T *  rhs,
sorParam,
int  numConstraints 
) [inline]

Definition at line 399 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaComputeInvMJTT ( int4 bodyIDs,
typename vec4< T >::Type *  j0,
typename vec4< T >::Type *  j1,
typename vec4< T >::Type *  j2,
typename vec4< T >::Type *  j3,
typename vec4< T >::Type *  ij0,
typename vec4< T >::Type *  ij1,
typename vec4< T >::Type *  ij2,
typename vec4< T >::Type *  ij3,
T *  iMass,
int  numConstraints,
typename vec4< T >::Type *  ii0,
typename vec4< T >::Type *  ii1,
typename vec4< T >::Type *  ii2 
) [inline]

Definition at line 321 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaIntegrateT ( typename vec4< T >::Type *  pos,
typename vec4< T >::Type *  lVel,
typename vec4< T >::Type *  aVel,
float  deltaTime,
int  numConstraints 
) [inline]

Definition at line 457 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaReduceIterativeCompactT ( typename vec4< T >::Type *  fc0_reduction,
typename vec4< T >::Type *  fc1_reduction,
const int  treePower 
) [inline]

Definition at line 157 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaReduceLoopedCompactT ( typename vec4< T >::Type *  fc0_reduction,
typename vec4< T >::Type *  fc1_reduction,
const int  size,
const int  step 
) [inline]

Definition at line 180 of file parallel_kernels.h.

template<typename T , unsigned int blockSize>
dxGlobal void cudaReduceSequentialT ( typename vec4< T >::Type *  fc0,
typename vec4< T >::Type *  fc1,
typename vec4< T >::Type *  fc0_reduction,
typename vec4< T >::Type *  fc1_reduction,
int  n 
) [inline]

Definition at line 240 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaReduceStridedT ( typename vec4< T >::Type *  fc0,
typename vec4< T >::Type *  fc1,
const typename vec4< T >::Type *  fc0_reduction,
const typename vec4< T >::Type *  fc1_reduction,
const int  reductionStride,
const int  bodySize,
const int  reductionSize 
) [inline]

Definition at line 209 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaSORLCPT ( typename vec4< T >::Type *  fc0_reduction,
typename vec4< T >::Type *  fc1_reduction,
T *  lambda,
const int4 bodyIDs,
const int *  fIDs,
const typename vec4< T >::Type *  j,
const typename vec4< T >::Type *  ij,
const typename vec4< T >::Type *  fc0,
const typename vec4< T >::Type *  fc1,
const T *  adcfm,
const T *  rhs,
const T *  hilo,
const int  offset,
const int  numConstraints,
const int  bStride,
const int  cStride 
) [inline]

Definition at line 38 of file parallel_kernels.h.

template<typename T >
dxGlobal void cudaZeroT ( T *  buffer,
const int  bufferSize 
) [inline]

Definition at line 27 of file parallel_kernels.h.

template<typename T >
static dxDevice T parallel_inf (  )  [inline, static]

Definition at line 9 of file parallel_kernels.h.

template<>
dxDevice double parallel_inf< double > (  )  [inline]
template<>
dxDevice float parallel_inf< float > (  )  [inline]
template<typename T >
static dxDevice T parallel_zero (  )  [inline, static]

Definition at line 10 of file parallel_kernels.h.

template<>
dxDevice double parallel_zero< double > (  )  [inline]
template<>
dxDevice double4 parallel_zero< double4 > (  )  [inline]
template<>
dxDevice float parallel_zero< float > (  )  [inline]
template<>
dxDevice float4 parallel_zero< float4 > (  )  [inline]

Variable Documentation

uint2 s_blockDim [static]

Definition at line 7 of file parallel_kernels.h.

uint2 s_blockIdx [static]

Definition at line 7 of file parallel_kernels.h.

uint2 s_threadIdx [static]

Definition at line 7 of file parallel_kernels.h.

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines


parallel_quickstep
Author(s): Jared Duke
autogenerated on Fri Jan 11 09:59:40 2013