diff --git a/src/CZeroSuppression.cpp b/src/CZeroSuppression.cpp index 70c6b3998d1b6f387cbff56f14647873f2fb9cde..d9b6ac54e1a6a16ad1d404237a6b5ffd84012840 100644 --- a/src/CZeroSuppression.cpp +++ b/src/CZeroSuppression.cpp @@ -22,6 +22,8 @@ // Number of left shifts equivalent to the number of samples in a seeData // element (sizeof(__m128i) / sizeof(int16_t)) #define SSE_DATA_SHIFT 3 +// Index of last element in a SSE buffer (16 samples) +#define SSE_UPPER_INDEX 15 #endif namespace ntof { @@ -277,11 +279,13 @@ boost::optional CZeroSuppression::PerformZeroSuppression( const __m128i sseThr = _mm_set1_epi16(static_cast(thresholdCode_)); + // Variable used with a mask to detect the relative position of the current + // crossing samples within a SSE buffer (16 samples) + const __m128i relPos = _mm_set_epi8(0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, + 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, + 0x04, 0x03, 0x02, 0x01); + uint64_t sseDataIndex = 0; - uint64_t lastSamplePos = dataIndex; // Position of the last sample examined - // in data_ - uint64_t currSamplePos = 0; // Position of the current sample examined in - // data_ #endif while (true) @@ -311,31 +315,44 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmplt_epi16(sseData[sseDataIndex + 1], sseNegPulseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); - - int sseResult = _mm_movemask_epi8(pack); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "negative pulse found at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + // Get the mask of crossing samples after dataIndex + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "negative pulse found at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } @@ -361,31 +378,44 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmpgt_epi16(sseData[sseDataIndex + 1], ssePosPulseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - int sseResult = _mm_movemask_epi8(pack); - - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "positive pulse found at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + // Get the mask of crossing samples after dataIndex + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "positive pulse found at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } @@ -426,31 +456,44 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmpgt_epi16(sseData[sseDataIndex + 1], sseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - int sseResult = _mm_movemask_epi8(pack); - - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "negative pulse ending at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + // Get the mask of crossing samples after dataIndex + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "negative pulse ending at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } @@ -468,31 +511,44 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmplt_epi16(sseData[sseDataIndex + 1], sseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - int sseResult = _mm_movemask_epi8(pack); - - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "positive pulse ending at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + // Get the mask of crossing samples after dataIndex + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "positive pulse ending at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif }