From 9a1896cba7479892fa92355405478f39f1ea9074 Mon Sep 17 00:00:00 2001 From: gdeblasi Date: Mon, 10 Aug 2020 19:58:38 +0200 Subject: [PATCH 1/2] Replace sequential search of crossing samples in the SSE buffer with a parallel one --- src/CZeroSuppression.cpp | 220 ++++++++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 84 deletions(-) diff --git a/src/CZeroSuppression.cpp b/src/CZeroSuppression.cpp index 70c6b39..0c9c496 100644 --- a/src/CZeroSuppression.cpp +++ b/src/CZeroSuppression.cpp @@ -22,6 +22,8 @@ // Number of left shifts equivalent to the number of samples in a seeData // element (sizeof(__m128i) / sizeof(int16_t)) #define SSE_DATA_SHIFT 3 +// Index of last element in a SSE-sample buffer (16 samples) +#define SSE_UPPER_INDEX 15 #endif namespace ntof { @@ -277,11 +279,13 @@ boost::optional CZeroSuppression::PerformZeroSuppression( const __m128i sseThr = _mm_set1_epi16(static_cast(thresholdCode_)); + // Variable used with a mask to detect the relative position of the current + // crossing samples within a SSE-sample buffer (16 samples) + const __m128i relPos = _mm_set_epi8(0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, + 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, + 0x04, 0x03, 0x02, 0x01); + uint64_t sseDataIndex = 0; - uint64_t lastSamplePos = dataIndex; // Position of the last sample examined - // in data_ - uint64_t currSamplePos = 0; // Position of the current sample examined in - // data_ #endif while (true) @@ -311,31 +315,43 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmplt_epi16(sseData[sseDataIndex + 1], sseNegPulseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); - - int sseResult = _mm_movemask_epi8(pack); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE-sample buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // If the last element position of the SSE-sample buffer is greater + // than the one of the last data_ sample examined (dataIndex)... + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "negative pulse found at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE-sample buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "negative pulse found at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } @@ -361,31 +377,43 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmpgt_epi16(sseData[sseDataIndex + 1], ssePosPulseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - int sseResult = _mm_movemask_epi8(pack); - - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE-sample buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // If the last element position of the SSE-sample buffer is greater + // than the one of the last data_ sample examined (dataIndex)... + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "positive pulse found at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE-sample buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "positive pulse found at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } @@ -426,31 +454,43 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmpgt_epi16(sseData[sseDataIndex + 1], sseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - int sseResult = _mm_movemask_epi8(pack); - - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE-sample buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // If the last element position of the SSE-sample buffer is greater + // than the one of the last data_ sample examined (dataIndex)... + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "negative pulse ending at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE-sample buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "negative pulse ending at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } @@ -468,31 +508,43 @@ boost::optional CZeroSuppression::PerformZeroSuppression( __m128i cmp1 = _mm_cmplt_epi16(sseData[sseDataIndex + 1], sseThr); - __m128i pack = _mm_packs_epi16(cmp0, cmp1); + // Mask of the 16 samples crossing the threshold + __m128i crossSampleMask = _mm_packs_epi16(cmp0, cmp1); - int sseResult = _mm_movemask_epi8(pack); - - while (sseResult != 0) + // If there is at least one crossing sample... + if (_mm_movemask_epi8(crossSampleMask)) { - currSamplePos = (sseDataIndex << SSE_DATA_SHIFT) + - __builtin_ffs(sseResult); - if (currSamplePos > lastSamplePos) + // Calculate the corresponding position within the SSE-sample buffer + __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); + + // Determine the current first element position of the SSE-sample + // buffer in data_ + uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; + + // If the last element position of the SSE-sample buffer is greater + // than the one of the last data_ sample examined (dataIndex)... + if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { - dataIndex = currSamplePos - 1; - lastSamplePos = dataIndex; // update last position - LOG_TRACE << "positive pulse ending at sse data index: " + - boost::lexical_cast(sseDataIndex); - LOG_TRACE << "data index = " + - boost::lexical_cast(dataIndex); - found = true; - break; + // Determine the relative position of dataIndex within the + // SSE-sample buffer + __m128i dataIndexPos = (dataIndex > firstSamplePos) ? + _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : + _mm_set1_epi8(static_cast(0)); + + int sseResult = _mm_movemask_epi8( + _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); + + if (sseResult != 0) + { + dataIndex = firstSamplePos + __builtin_ffs(sseResult) - 1; + LOG_TRACE << "positive pulse ending at sse data index: " + + boost::lexical_cast(sseDataIndex); + LOG_TRACE << "data index = " + + boost::lexical_cast(dataIndex); + break; + } } - - sseResult &= ~(1 << (__builtin_ffs(sseResult) - 1)); } - if (found) - break; - sseDataIndex += 2; #endif } -- GitLab From 5e951432bafe7f6d3017d30e8080ce9da10f74e2 Mon Sep 17 00:00:00 2001 From: gdeblasi Date: Mon, 10 Aug 2020 22:49:26 +0200 Subject: [PATCH 2/2] Replace sequential search of crossing samples in the SSE buffer with a parallel one --- src/CZeroSuppression.cpp | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/CZeroSuppression.cpp b/src/CZeroSuppression.cpp index 0c9c496..d9b6ac5 100644 --- a/src/CZeroSuppression.cpp +++ b/src/CZeroSuppression.cpp @@ -22,7 +22,7 @@ // Number of left shifts equivalent to the number of samples in a seeData // element (sizeof(__m128i) / sizeof(int16_t)) #define SSE_DATA_SHIFT 3 -// Index of last element in a SSE-sample buffer (16 samples) +// Index of last element in a SSE buffer (16 samples) #define SSE_UPPER_INDEX 15 #endif @@ -280,7 +280,7 @@ boost::optional CZeroSuppression::PerformZeroSuppression( const __m128i sseThr = _mm_set1_epi16(static_cast(thresholdCode_)); // Variable used with a mask to detect the relative position of the current - // crossing samples within a SSE-sample buffer (16 samples) + // crossing samples within a SSE buffer (16 samples) const __m128i relPos = _mm_set_epi8(0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01); @@ -321,23 +321,24 @@ boost::optional CZeroSuppression::PerformZeroSuppression( // If there is at least one crossing sample... if (_mm_movemask_epi8(crossSampleMask)) { - // Calculate the corresponding position within the SSE-sample buffer + // Calculate the corresponding position within the SSE buffer __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); // Determine the current first element position of the SSE-sample // buffer in data_ uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; - // If the last element position of the SSE-sample buffer is greater - // than the one of the last data_ sample examined (dataIndex)... + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { // Determine the relative position of dataIndex within the - // SSE-sample buffer + // SSE buffer __m128i dataIndexPos = (dataIndex > firstSamplePos) ? _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : _mm_set1_epi8(static_cast(0)); + // Get the mask of crossing samples after dataIndex int sseResult = _mm_movemask_epi8( _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); @@ -383,23 +384,24 @@ boost::optional CZeroSuppression::PerformZeroSuppression( // If there is at least one crossing sample... if (_mm_movemask_epi8(crossSampleMask)) { - // Calculate the corresponding position within the SSE-sample buffer + // Calculate the corresponding position within the SSE buffer __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); // Determine the current first element position of the SSE-sample // buffer in data_ uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; - // If the last element position of the SSE-sample buffer is greater - // than the one of the last data_ sample examined (dataIndex)... + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { // Determine the relative position of dataIndex within the - // SSE-sample buffer + // SSE buffer __m128i dataIndexPos = (dataIndex > firstSamplePos) ? _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : _mm_set1_epi8(static_cast(0)); + // Get the mask of crossing samples after dataIndex int sseResult = _mm_movemask_epi8( _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); @@ -460,23 +462,24 @@ boost::optional CZeroSuppression::PerformZeroSuppression( // If there is at least one crossing sample... if (_mm_movemask_epi8(crossSampleMask)) { - // Calculate the corresponding position within the SSE-sample buffer + // Calculate the corresponding position within the SSE buffer __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); // Determine the current first element position of the SSE-sample // buffer in data_ uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; - // If the last element position of the SSE-sample buffer is greater - // than the one of the last data_ sample examined (dataIndex)... + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { // Determine the relative position of dataIndex within the - // SSE-sample buffer + // SSE buffer __m128i dataIndexPos = (dataIndex > firstSamplePos) ? _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : _mm_set1_epi8(static_cast(0)); + // Get the mask of crossing samples after dataIndex int sseResult = _mm_movemask_epi8( _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); @@ -514,23 +517,24 @@ boost::optional CZeroSuppression::PerformZeroSuppression( // If there is at least one crossing sample... if (_mm_movemask_epi8(crossSampleMask)) { - // Calculate the corresponding position within the SSE-sample buffer + // Calculate the corresponding position within the SSE buffer __m128i crossSamplePos = _mm_and_si128(relPos, crossSampleMask); // Determine the current first element position of the SSE-sample // buffer in data_ uint64_t firstSamplePos = sseDataIndex << SSE_DATA_SHIFT; - // If the last element position of the SSE-sample buffer is greater - // than the one of the last data_ sample examined (dataIndex)... + // Check if it is possible to start searching for samples crossing + // the threshold in the SSE buffer if ((firstSamplePos + SSE_UPPER_INDEX) > dataIndex) { // Determine the relative position of dataIndex within the - // SSE-sample buffer + // SSE buffer __m128i dataIndexPos = (dataIndex > firstSamplePos) ? _mm_set1_epi8(static_cast(dataIndex - firstSamplePos)) : _mm_set1_epi8(static_cast(0)); + // Get the mask of crossing samples after dataIndex int sseResult = _mm_movemask_epi8( _mm_cmpgt_epi8(crossSamplePos, dataIndexPos)); -- GitLab