diff --git a/Pr/PrPixel/src/VeloClusterTrackingSIMD.cpp b/Pr/PrPixel/src/VeloClusterTrackingSIMD.cpp index 0011314ce37a1d6b464de6b7e53420d2a402b5cd..0e441a7fefa62a2e66545f91e323c4ba0bee1976 100644 --- a/Pr/PrPixel/src/VeloClusterTrackingSIMD.cpp +++ b/Pr/PrPixel/src/VeloClusterTrackingSIMD.cpp @@ -89,7 +89,7 @@ namespace LHCb::Pr::Velo { constexpr int NColumns = 256; } // namespace VPInfos - enum class SearchMode { Default, Fast, Full }; + enum class SearchMode { Default, Full }; template struct SearchModeParam { @@ -758,8 +758,7 @@ namespace LHCb::Pr::Velo { } // h1 } - template - [[gnu::always_inline]] inline void TrackForwarding( const LightTracksSoA* tracks, std::array P2s, + [[gnu::always_inline]] inline void TrackForwarding( const LightTracksSoA* tracks, HitsPlane* P2, LightTracksSoA* tracks_forwarded, LightTracksSoA* tracks_finalized ) const { using simd = SIMDWrapper::best::types; @@ -781,29 +780,27 @@ namespace LHCb::Pr::Velo { const F tx = ( p1.x() - p0.x() ) / ( p1.z() - p0.z() ); const F ty = ( p1.y() - p0.y() ) / ( p1.z() - p0.z() ); - for ( HitsPlane* P2 : P2s ) { - for ( const auto hitP2 : P2->scalar() ) { - const auto sp2 = hitP2.get(); - const auto p2 = LHCb::LinAlg::Vec{ sp2.x(), sp2.y(), sp2.z() }; + for ( const auto hitP2 : P2->scalar() ) { + const auto sp2 = hitP2.get(); + const auto p2 = LHCb::LinAlg::Vec{ sp2.x(), sp2.y(), sp2.z() }; - const F dz = p2.z() - p0.z(); - const F x = tx * dz + p0.x(); - const F y = ty * dz + p0.y(); + const F dz = p2.z() - p0.z(); + const F x = tx * dz + p0.x(); + const F y = ty * dz + p0.y(); - const F dx = x - p2.x(); - const F dy = y - p2.y(); - const F scatter = dx * dx + dy * dy; + const F dx = x - p2.x(); + const F dy = y - p2.y(); + const F scatter = dx * dx + dy * dy; - const auto m_bestfit = mask && ( scatter < bestFit ); + const auto m_bestfit = mask && ( scatter < bestFit ); - if ( none( m_bestfit ) ) continue; + if ( none( m_bestfit ) ) continue; - bestFit = select( m_bestfit, scatter, bestFit ); - bestP2.x() = select( m_bestfit, p2.x(), bestP2.x() ); - bestP2.y() = select( m_bestfit, p2.y(), bestP2.y() ); - bestP2.z() = select( m_bestfit, p2.z(), bestP2.z() ); - bestH2 = select( m_bestfit, I( hitP2.get().cast() ), bestH2 ); - } + bestFit = select( m_bestfit, scatter, bestFit ); + bestP2.x() = select( m_bestfit, p2.x(), bestP2.x() ); + bestP2.y() = select( m_bestfit, p2.y(), bestP2.y() ); + bestP2.z() = select( m_bestfit, p2.z(), bestP2.z() ); + bestH2 = select( m_bestfit, I( hitP2.get().cast() ), bestH2 ); } auto bestH0 = mask && ( bestFit < m_max_scatter_forwarding.value() ); @@ -829,7 +826,7 @@ namespace LHCb::Pr::Velo { auto m_forward = mask && ( skipped < I( max_skips ) ); // Remove used hits - for ( auto P2 : P2s ) removeHits( P2, bestH0, bestH2 ); + removeHits( P2, bestH0, bestH2 ); // Forward tracks { @@ -951,15 +948,43 @@ namespace LHCb::Pr::Velo { } // Tracking buffers - const auto num_P = m_seeding_window.value(); + constexpr auto planes = SearchModeParam::planes; + constexpr auto stride = SearchModeParam::planes_stride; + + const uint64_t missing_planes = ( [&]() -> uint64_t { + if constexpr ( searchMode == SearchMode::Full ) { return m_missing_modules.value(); } + // Magic numbers for deinterleaving + uint64_t even = m_missing_modules.value() & 0x5555555555555555; // Extract even bits (0,2,4,...) + uint64_t odd = m_missing_modules.value() & 0xAAAAAAAAAAAAAAAA; // Extract odd bits (1,3,5,...) + + // Compact the bits by shifting and ORing + even = ( even | ( even >> 1 ) ) & 0x3333333333333333; + even = ( even | ( even >> 2 ) ) & 0x0F0F0F0F0F0F0F0F; + even = ( even | ( even >> 4 ) ) & 0x00FF00FF00FF00FF; + even = ( even | ( even >> 8 ) ) & 0x0000FFFF0000FFFF; + even = ( even | ( even >> 16 ) ) & 0x00000000FFFFFFFF; + + odd = odd >> 1; // Adjust for odd bit positions + + odd = ( odd | ( odd >> 1 ) ) & 0x3333333333333333; + odd = ( odd | ( odd >> 2 ) ) & 0x0F0F0F0F0F0F0F0F; + odd = ( odd | ( odd >> 4 ) ) & 0x00FF00FF00FF00FF; + odd = ( odd | ( odd >> 8 ) ) & 0x0000FFFF0000FFFF; + odd = ( odd | ( odd >> 16 ) ) & 0x00000000FFFFFFFF; + + return even | odd; + } )(); + std::vector raw_P; std::vector P; - raw_P.reserve( num_P ); - P.reserve( num_P ); - for ( unsigned i = 0; i < num_P; i++ ) { + raw_P.reserve( planes ); + P.reserve( planes ); + for ( unsigned i = 0; i < planes; i++ ) { auto& p = raw_P.emplace_back( Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) ); p.reserve( reserved_hits_inPlane ); P.emplace_back( &p ); + int sensor = ( planes - 1 - i ) * stride; + getHits( VPRawBanks, sensor, sensor + stride, devp, *P[i], hits, m_sensorMasks ); } LightTracksSoA t_candidates{ Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) }; @@ -971,101 +996,36 @@ namespace LHCb::Pr::Velo { LightTracksSoA *tracks_candidates = &t_candidates, *tracks_forwarded = &t_forwarded; // Do tracking backward - if constexpr ( searchMode == SearchMode::Fast ) { - std::vector raw_P2; - std::vector P2; - raw_P2.reserve( num_P ); - P2.reserve( num_P ); - for ( unsigned i = 0; i < num_P; i++ ) { - auto& p = raw_P2.emplace_back( Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) ); - p.reserve( reserved_hits_inPlane ); - P2.emplace_back( &p ); - } + { + // Prologue + unsigned last_module_pair = 2; - const int i0 = VPInfos::NPlanes - 1, i1 = VPInfos::NPlanes - 2, i2 = VPInfos::NPlanes - 3; - - // Side 1 - int sensor0 = i0 * VPInfos::NSensorsPerPlane; - int sensor1 = i1 * VPInfos::NSensorsPerPlane; - int sensor2 = i2 * VPInfos::NSensorsPerPlane; - getHits( VPRawBanks, sensor0, sensor0 + VPInfos::NSensorsPerModule, devp, *P[0], hits, - m_sensorMasks ); - getHits( VPRawBanks, sensor1, sensor1 + VPInfos::NSensorsPerModule, devp, *P[1], hits, - m_sensorMasks ); - getHits( VPRawBanks, sensor2, sensor2 + VPInfos::NSensorsPerModule, devp, *P[2], hits, - m_sensorMasks ); - TrackSeeding( P[0], P[1], P[2], tracks_candidates ); - - // Side 2 - sensor0 += VPInfos::NSensorsPerModule; - sensor1 += VPInfos::NSensorsPerModule; - sensor2 += VPInfos::NSensorsPerModule; - - getHits( VPRawBanks, sensor0, sensor0 + VPInfos::NSensorsPerModule, devp, *P2[0], hits, - m_sensorMasks ); - getHits( VPRawBanks, sensor1, sensor1 + VPInfos::NSensorsPerModule, devp, *P2[1], hits, - m_sensorMasks ); - getHits( VPRawBanks, sensor2, sensor2 + VPInfos::NSensorsPerModule, devp, *P2[2], hits, - m_sensorMasks ); - TrackSeeding( P2[0], P2[1], P2[2], tracks_candidates ); - - std::rotate( P.begin(), P.begin() + 1, P.end() ); - std::rotate( P2.begin(), P2.begin() + 1, P2.end() ); - - for ( int p = VPInfos::NPlanes - 4; p >= 0; p-- ) { - const int sensor = p * VPInfos::NSensorsPerPlane; - getHits( VPRawBanks, sensor, sensor + VPInfos::NSensorsPerModule, devp, *P[2], hits, - m_sensorMasks ); - getHits( VPRawBanks, sensor + VPInfos::NSensorsPerModule, - sensor + 2 * VPInfos::NSensorsPerModule, devp, *P2[2], hits, m_sensorMasks ); - - TrackForwarding( tracks_candidates, std::array( { P[2], P2[2] } ), tracks_forwarded, &tracks ); + TrackSeeding( P[last_module_pair - 2], P[last_module_pair - 1], P[last_module_pair], tracks_candidates ); + last_module_pair++; + while ( last_module_pair < planes ) { + TrackForwarding( tracks_candidates, P[last_module_pair], tracks_forwarded, &tracks ); std::swap( tracks_candidates, tracks_forwarded ); - TrackSeeding( P[0], P[1], P[2], tracks_candidates ); - TrackSeeding( P2[0], P2[1], P2[2], tracks_candidates ); - - std::rotate( P.begin(), P.begin() + 1, P.end() ); - std::rotate( P2.begin(), P2.begin() + 1, P2.end() ); - } + unsigned offset = last_module_pair - 1; + uint64_t i_mask = 1 << last_module_pair; + for ( unsigned j = 1; j < last_module_pair; j++ ) { + uint64_t j_mask = 1 << ( offset - j ); + for ( unsigned k = 0; k < j; k++ ) { + uint64_t k_mask = 1 << ( offset - k ); + uint64_t kj_mask = j_mask | k_mask; + uint64_t skipped_mask = ( ( i_mask - 1 ) & ~( ( j_mask << 1 ) - 1 ) ) & ~k_mask; + unsigned window_size = j + 2; - copy_remaining( tracks_candidates, &tracks ); + if ( ( skipped_mask & missing_planes ) != skipped_mask && window_size > m_seeding_window ) continue; - } else { - // Prologue - constexpr auto planes = SearchModeParam::planes; - constexpr auto stride = SearchModeParam::planes_stride; - - int sensor0 = ( planes - 1 ) * stride; - int sensor1 = ( planes - 2 ) * stride; - int sensor2 = ( planes - 3 ) * stride; - getHits( VPRawBanks, sensor0, sensor0 + stride, devp, *P[0], hits, m_sensorMasks ); - getHits( VPRawBanks, sensor1, sensor1 + stride, devp, *P[1], hits, m_sensorMasks ); - getHits( VPRawBanks, sensor2, sensor2 + stride, devp, *P[2], hits, m_sensorMasks ); - - TrackSeeding( P[0], P[1], P[2], tracks_candidates ); - unsigned last_sensor = 2; - if ( last_sensor < num_P - 1 ) - last_sensor++; - else - std::rotate( P.begin(), P.begin() + 1, P.end() ); - - for ( int p = planes - 4; p >= 0; p-- ) { - const int sensor = p * stride; - getHits( VPRawBanks, sensor, sensor + stride, devp, *P[last_sensor], hits, m_sensorMasks ); - TrackForwarding( tracks_candidates, std::array( { P[last_sensor] } ), tracks_forwarded, - &tracks ); - std::swap( tracks_candidates, tracks_forwarded ); + TrackSeeding( P[offset - j], P[offset - k], P[last_module_pair], tracks_candidates ); - for ( unsigned j = 1; j < last_sensor; j++ ) { - for ( unsigned k = 0; k < j; k++ ) { TrackSeeding( P[k], P[j], P[last_sensor], tracks_candidates ); } + if ( ( kj_mask & missing_planes ) == 0 && window_size > m_seeding_window ) goto nextstep; + } } - - if ( last_sensor < num_P - 1 ) - last_sensor++; - else - std::rotate( P.begin(), P.begin() + 1, P.end() ); + nextstep: + last_module_pair++; } copy_remaining( tracks_candidates, &tracks ); @@ -1245,17 +1205,14 @@ namespace LHCb::Pr::Velo { Gaudi::Property m_max_scatter_3hits{ this, "MaxScatter3hits", 0.02f }; Gaudi::Property m_skip_forward{ this, "SkipForward", SearchModeParam::skip_forward_default }; Gaudi::Property m_seeding_window{ this, "SeedingWindow", SearchModeParam::plane_window }; + Gaudi::Property m_missing_modules{ this, "MissingModules", 0 }; }; - using VeloClusterTrackingSIMD = ClusterTrackingSIMD; - using VeloClusterTrackingSIMDFaster = ClusterTrackingSIMD; - using VeloClusterTrackingSIMDFull = ClusterTrackingSIMD; - using VeloRetinaClusterTrackingSIMD = ClusterTrackingSIMD; - using VeloRetinaClusterTrackingSIMDFaster = ClusterTrackingSIMD; - using VeloRetinaClusterTrackingSIMDFull = ClusterTrackingSIMD; + using VeloClusterTrackingSIMD = ClusterTrackingSIMD; + using VeloClusterTrackingSIMDFull = ClusterTrackingSIMD; + using VeloRetinaClusterTrackingSIMD = ClusterTrackingSIMD; + using VeloRetinaClusterTrackingSIMDFull = ClusterTrackingSIMD; DECLARE_COMPONENT_WITH_ID( VeloClusterTrackingSIMD, "VeloClusterTrackingSIMD" ) - DECLARE_COMPONENT_WITH_ID( VeloClusterTrackingSIMDFaster, "VeloClusterTrackingSIMDFaster" ) DECLARE_COMPONENT_WITH_ID( VeloClusterTrackingSIMDFull, "VeloClusterTrackingSIMDFull" ) DECLARE_COMPONENT_WITH_ID( VeloRetinaClusterTrackingSIMD, "VeloRetinaClusterTrackingSIMD" ) - DECLARE_COMPONENT_WITH_ID( VeloRetinaClusterTrackingSIMDFaster, "VeloRetinaClusterTrackingSIMDFaster" ) DECLARE_COMPONENT_WITH_ID( VeloRetinaClusterTrackingSIMDFull, "VeloRetinaClusterTrackingSIMDFull" ) } // namespace LHCb::Pr::Velo