From a9220bacd8b6fe6537fdf3bf48149de1914f967a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20G=C3=BCnther?= Date: Fri, 5 May 2023 13:49:05 +0200 Subject: [PATCH 1/4] Move PrVeloUT into its own namespace and restructure and clean it --- Pr/PrVeloUT/src/PrVeloUT.cpp | 598 ++++++++++++++++------------------- 1 file changed, 281 insertions(+), 317 deletions(-) diff --git a/Pr/PrVeloUT/src/PrVeloUT.cpp b/Pr/PrVeloUT/src/PrVeloUT.cpp index c848a7904e8..51606bef095 100644 --- a/Pr/PrVeloUT/src/PrVeloUT.cpp +++ b/Pr/PrVeloUT/src/PrVeloUT.cpp @@ -1,5 +1,5 @@ -/***************************************************************************** \ -* (c) Copyright 2000-2022 CERN for the benefit of the LHCb Collaboration * +/*****************************************************************************\ +* (c) Copyright 2024 CERN for the benefit of the LHCb Collaboration * * * * This software is distributed under the terms of the GNU General Public * * Licence version 3 (GPL Version 3), copied verbatim in the file "COPYING". * @@ -8,299 +8,179 @@ * granted to it by virtue of its status as an Intergovernmental Organization * * or submit itself to any jurisdiction. * \*****************************************************************************/ + #include "PrUTMagnetTool.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/** @class PrVeloUT PrVeloUT.h - * - * PrVeloUT algorithm. This is just a wrapper, - * the actual pattern recognition is done in the 'PrVeloUTTool'. - * - * - InputTracksName: Input location for Velo tracks - * - OutputTracksName: Output location for VeloTT tracks - * - TimingMeasurement: Do a timing measurement? - * - * @author Mariusz Witek - * @date 2007-05-08 - * @update for A-Team framework 2007-08-20 SHM - * - * 2017-03-01: Christoph Hasse (adapt to future framework) - * 2019-04-26: Arthur Hennequin (change data Input/Output) - * 2020-08-26: Peilian Li (change data Input/Output to SOACollection) - */ - -namespace LHCb::Pr { - - using simd = SIMDWrapper::best::types; - using scalar = SIMDWrapper::scalar::types; - - constexpr auto nanMomentum = std::numeric_limits::quiet_NaN(); - - constexpr static int batchSize = 2 * simd::size; - const int totalUTLayers = static_cast( UTInfo::DetectorNumbers::TotalLayers ); - - struct ProtoTracks final { - - std::array wbs; - std::array xMidFields; - std::array invKinkVeloDists; - - // -- this is for the hits - // -- this does _not_ include overlap hits, so only 4 per track - std::array xs; - std::array zs; - std::array weightss{}; // this needs to be zero-initialized - std::array sins; - std::array ids; - std::array hitIndexs; - - // -- this is the output of the fit - std::array qps; - std::array chi2TTs; - std::array xTTs; - std::array xSlopeTTs; - std::array ys; - - // -- and this the original state (in the Velo) - std::array statePoss; - std::array stateDirs; - std::array ancestorIndexs; - - // -- and this an index to find the hit containers - std::array hitContIndexs; - - std::size_t size{0}; - SOA_ACCESSOR_VAR( x, &( xs[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( z, &( zs[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( weight, &( weightss[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( sin, &( sins[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( id, &( ids[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( hitIndex, &( hitIndexs[pos * batchSize] ), int pos ) - - SOA_ACCESSOR( qp, qps.data() ) - SOA_ACCESSOR( chi2TT, chi2TTs.data() ) - SOA_ACCESSOR( xTT, xTTs.data() ) - SOA_ACCESSOR( xSlopeTT, xSlopeTTs.data() ) - SOA_ACCESSOR( y, ys.data() ) - - SOA_ACCESSOR( ancestorIndex, ancestorIndexs.data() ) - SOA_ACCESSOR( hitContIndex, hitContIndexs.data() ) - VEC3_SOA_ACCESSOR( pos, (float*)&( statePoss[0] ), (float*)&( statePoss[batchSize] ), - (float*)&( statePoss[2 * batchSize] ) ) - VEC3_XY_SOA_ACCESSOR( dir, (float*)&( stateDirs[0] ), (float*)&( stateDirs[batchSize] ), 1.0f ) - - SOA_ACCESSOR( wb, wbs.data() ) - SOA_ACCESSOR( xMidField, xMidFields.data() ) - SOA_ACCESSOR( invKinkVeloDist, invKinkVeloDists.data() ) - - void initTracks( int indexVal, float maxPseudoChi2 ) { - hitIndexs.fill( indexVal ); - chi2TTs.fill( maxPseudoChi2 ); - size = 0; - } - template - void fillHelperParams( Vec3 pos, Vec3 dir, const float zKink, - const float sigmaVeloSlope ) { +#include +#include +#include +#include + +#include "Gaudi/Accumulators.h" +#include "GaudiKernel/EventContext.h" +#include "GaudiKernel/SystemOfUnits.h" + +#include "DetDesc/GenericConditionAccessorHolder.h" +#include "Magnet/DeMagnet.h" +#include "UTDet/DeUTDetector.h" + +#include "Event/PrHits.h" +#include "Event/PrUpstreamTracks.h" +#include "Event/PrVeloTracks.h" +#include "Event/UTSectorHelper.h" +#include "Event/UTTrackUtils.h" +#include "Event/ZipUtils.h" +#include "Kernel/STLExtensions.h" +#include "LHCbAlgs/Transformer.h" +#include "LHCbMath/GeomFun.h" +#include "LHCbMath/SIMDWrapper.h" + +#include "PrKernel/PrMutUTHits.h" +#include "UTDAQ/UTDAQHelper.h" +#include "UTDAQ/UTInfo.h" + +#include "vdt/sqrt.h" + +namespace LHCb::Pr::VeloUT { + namespace { + using simd = SIMDWrapper::best::types; + using scalar = SIMDWrapper::scalar::types; + using TracksTag = Upstream::Tag; + namespace HitTag = UT::Mut::HitTag; + + constexpr auto nanMomentum = std::numeric_limits::quiet_NaN(); + constexpr auto batchSize = 2 * simd::size; + constexpr auto totalUTLayers = static_cast( UTInfo::DetectorNumbers::TotalLayers ); + + struct ProtoTracks final { + + std::array wbs; + std::array xMidFields; + std::array invKinkVeloDists; + + // -- this is for the hits + // -- this does _not_ include overlap hits, so only 4 per track + std::array xs; + std::array zs; + std::array weightss{}; // this needs to be zero-initialized + std::array sins; + std::array ids; + std::array hitIndexs; + + // -- this is the output of the fit + std::array qps; + std::array chi2TTs; + std::array xTTs; + std::array xSlopeTTs; + std::array ys; + + // -- and this the original state (in the Velo) + std::array statePoss; + std::array stateDirs; + std::array ancestorIndexs; + + // -- and this an index to find the hit containers + std::array hitContIndexs; + + std::size_t size{0}; + SOA_ACCESSOR_VAR( x, &( xs[pos * batchSize] ), int pos ) + SOA_ACCESSOR_VAR( z, &( zs[pos * batchSize] ), int pos ) + SOA_ACCESSOR_VAR( weight, &( weightss[pos * batchSize] ), int pos ) + SOA_ACCESSOR_VAR( sin, &( sins[pos * batchSize] ), int pos ) + SOA_ACCESSOR_VAR( id, &( ids[pos * batchSize] ), int pos ) + SOA_ACCESSOR_VAR( hitIndex, &( hitIndexs[pos * batchSize] ), int pos ) + + SOA_ACCESSOR( qp, qps.data() ) + SOA_ACCESSOR( chi2TT, chi2TTs.data() ) + SOA_ACCESSOR( xTT, xTTs.data() ) + SOA_ACCESSOR( xSlopeTT, xSlopeTTs.data() ) + SOA_ACCESSOR( y, ys.data() ) + + SOA_ACCESSOR( ancestorIndex, ancestorIndexs.data() ) + SOA_ACCESSOR( hitContIndex, hitContIndexs.data() ) + VEC3_SOA_ACCESSOR( pos, (float*)&( statePoss[0] ), (float*)&( statePoss[batchSize] ), + (float*)&( statePoss[2 * batchSize] ) ) + VEC3_XY_SOA_ACCESSOR( dir, (float*)&( stateDirs[0] ), (float*)&( stateDirs[batchSize] ), 1.0f ) + + SOA_ACCESSOR( wb, wbs.data() ) + SOA_ACCESSOR( xMidField, xMidFields.data() ) + SOA_ACCESSOR( invKinkVeloDist, invKinkVeloDists.data() ) + + void initTracks( int indexVal, float maxPseudoChi2 ) { + hitIndexs.fill( indexVal ); + chi2TTs.fill( maxPseudoChi2 ); + size = 0; + } + + template + void fillHelperParams( Vec3 pos, Vec3 dir, const float zKink, + const float sigmaVeloSlope ) { - using F = typename dType::float_v; + using F = typename dType::float_v; - F( pos.x + dir.x * ( zKink - pos.z ) ).store( xMidFields.data() ); - F a = sigmaVeloSlope * ( zKink - pos.z ); - F( 1.0f / ( a * a ) ).store( wbs.data() ); - F( 1.0f / ( zKink - pos.z ) ).store( invKinkVeloDists.data() ); - } + F( pos.x + dir.x * ( zKink - pos.z ) ).store( xMidFields.data() ); + F a = sigmaVeloSlope * ( zKink - pos.z ); + F( 1.0f / ( a * a ) ).store( wbs.data() ); + F( 1.0f / ( zKink - pos.z ) ).store( invKinkVeloDists.data() ); + } - template - void storeSimpleFitInfo( typename dType::float_v chi2TT, typename dType::float_v qp, typename dType::float_v xTT, - typename dType::float_v xSlopeTT, unsigned int trackIndex ) { + template + void storeSimpleFitInfo( typename dType::float_v chi2TT, typename dType::float_v qp, typename dType::float_v xTT, + typename dType::float_v xSlopeTT, unsigned int trackIndex ) { - using F = typename dType::float_v; + using F = typename dType::float_v; - F( chi2TT ).store( &chi2TTs[trackIndex] ); - F( qp ).store( &qps[trackIndex] ); - F( xTT ).store( &xTTs[trackIndex] ); - F( xSlopeTT ).store( &xSlopeTTs[trackIndex] ); - } + F( chi2TT ).store( &chi2TTs[trackIndex] ); + F( qp ).store( &qps[trackIndex] ); + F( xTT ).store( &xTTs[trackIndex] ); + F( xSlopeTT ).store( &xSlopeTTs[trackIndex] ); + } - // -- this runs over all 4 layers, even if no hit was found - // -- but it fills a weight of 0 - // -- Note: These are not "physical" layers, as the hits are ordered such that only - // -- the last one can be not filled. - template - void fillHitInfo( const LHCb::Event::Zip& hitsInL, - unsigned int trackIndex ) { - - using F = typename dType::float_v; - using I = typename dType::int_v; - auto nValid{0}; - if ( hitsInL.size() != 0 ) { - for ( int i = 0; i < totalUTLayers; ++i ) { - int hitI = hitIndexs[trackIndex + i * batchSize]; - const auto valid = ( hitI != -1 ); - const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; - nValid += valid; - hitI = std::max( 0, hitI ); - LHCb::LHCbID id( LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() ) ); - - F( weight ).store( &weightss[trackIndex + i * batchSize] ); - F( hitsInL[hitI].x() ).store( &xs[trackIndex + i * batchSize] ); - F( hitsInL[hitI].z() ).store( &zs[trackIndex + i * batchSize] ); - F( hitsInL[hitI].sin() ).store( &sins[trackIndex + i * batchSize] ); - I( hitsInL[hitI].index() ).store( &hitIndexs[trackIndex + i * batchSize] ); - I( id.lhcbID() ).store( &ids[trackIndex + i * batchSize] ); + // -- this runs over all 4 layers, even if no hit was found + // -- but it fills a weight of 0 + // -- Note: These are not "physical" layers, as the hits are ordered such that only + // -- the last one can be not filled. + template + void fillHitInfo( const LHCb::Event::Zip& hitsInL, + unsigned int trackIndex ) { + + using F = typename dType::float_v; + using I = typename dType::int_v; + auto nValid{0}; + if ( hitsInL.size() != 0 ) { + for ( int i = 0; i < totalUTLayers; ++i ) { + int hitI = hitIndexs[trackIndex + i * batchSize]; + const auto valid = ( hitI != -1 ); + const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; + nValid += valid; + hitI = std::max( 0, hitI ); + LHCb::LHCbID id( LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() ) ); + + F( weight ).store( &weightss[trackIndex + i * batchSize] ); + F( hitsInL[hitI].x() ).store( &xs[trackIndex + i * batchSize] ); + F( hitsInL[hitI].z() ).store( &zs[trackIndex + i * batchSize] ); + F( hitsInL[hitI].sin() ).store( &sins[trackIndex + i * batchSize] ); + I( hitsInL[hitI].index() ).store( &hitIndexs[trackIndex + i * batchSize] ); + I( id.lhcbID() ).store( &ids[trackIndex + i * batchSize] ); + } } + // this is useful in filterMode + if ( !nValid ) { F( nanMomentum ).store( &qps[trackIndex] ); } } - // this is useful in filterMode - if ( !nValid ) { F( nanMomentum ).store( &qps[trackIndex] ); } - } - }; + }; - namespace { - struct VeloUTGeomCache final { - VeloUTGeomCache( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) : common( det ) { + struct VeloUTGeomCache { + VeloUTGeomCache( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) : common{det} { // m_zMidUT is a position of normalization plane which should to be close to z middle of UT ( +- 5 cm ). // Cached once in VeloUTTool at initialization. No need to update with small UT movement. zMidUT = magtoolcache.zCenterUT; // zMidField and distToMomentum is properly recalculated in PrUTMagnetTool when B field changes distToMomentum = 1.0f / magtoolcache.dist2mom; } - VeloUTGeomCache() = default; UTDAQ::GeomCache common; - float zMidUT{0.}, distToMomentum{0.}; + float zMidUT{0.f}; + float distToMomentum{0.f}; }; - } // namespace - - class VeloUT - : public LHCb::Algorithm::Transformer> { - - public: - /// Standard constructor - using base_class_t = - LHCb::Algorithm::Transformer>; - - VeloUT( const std::string& name, ISvcLocator* pSvcLocator ) - : base_class_t( name, pSvcLocator, - {KeyValue{"InputTracksName", "Rec/Track/Velo"}, KeyValue{"UTHits", UTInfo::HitLocation}, - KeyValue{"GeometryInfo", "AlgorithmSpecific-" + name + "-UTGeometryInfo"}, - KeyValue{"Magnet", LHCb::Det::Magnet::det_path}}, - KeyValue{"OutputTracksName", "Rec/Track/UT"} ) {} - - StatusCode initialize() override { - return Transformer::initialize().andThen( [&] { - addConditionDerivation( {DeUTDetLocation::location(), m_PrUTMagnetTool->cacheLocation()}, - inputLocation(), - []( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) { - return VeloUTGeomCache( det, magtoolcache ); - } ); - } ); - } - - Upstream::Tracks operator()( const EventContext&, const Velo::Tracks&, const LHCb::Pr::UT::Hits&, - const VeloUTGeomCache&, const DeMagnet& ) const override final; - - private: - Gaudi::Property m_minMomentum{this, "MinMomentum", 1500.f * Gaudi::Units::MeV}; - Gaudi::Property m_minPT{this, "MinPT", 300.f * Gaudi::Units::MeV}; - Gaudi::Property m_minMomentumFinal{this, "MinMomentumFinal", 2500.f * Gaudi::Units::MeV}; - Gaudi::Property m_minPTFinal{this, "MinPTFinal", 425.f * Gaudi::Units::MeV}; - Gaudi::Property m_maxPseudoChi2{this, "MaxPseudoChi2", 1280.}; - Gaudi::Property m_yTol{this, "YTolerance", 0.5 * Gaudi::Units::mm}; // 0.8 - Gaudi::Property m_yTolSlope{this, "YTolSlope", 0.08}; // 0.2 - Gaudi::Property m_hitTol{this, "HitTol", 0.8 * Gaudi::Units::mm}; // 0.8 - Gaudi::Property m_deltaTx{this, "DeltaTx", 0.018}; // 0.02 - Gaudi::Property m_maxXSlope{this, "MaxXSlope", 0.350}; - Gaudi::Property m_maxYSlope{this, "MaxYSlope", 0.300}; - Gaudi::Property m_centralHoleSize{this, "CentralHoleSize", 33. * Gaudi::Units::mm}; - Gaudi::Property m_intraLayerDist{this, "IntraLayerDist", 15.0 * Gaudi::Units::mm}; - Gaudi::Property m_overlapTol{this, "OverlapTol", 0.5 * Gaudi::Units::mm}; - Gaudi::Property m_passHoleSize{this, "PassHoleSize", 40. * Gaudi::Units::mm}; - Gaudi::Property m_LD3Hits{this, "LD3HitsMin", -0.5}; - Gaudi::Property m_LD4Hits{this, "LD4HitsMin", -0.5}; - Gaudi::Property m_minLayers{this, "MinLayers", 3}; - - Gaudi::Property m_printVariables{this, "PrintVariables", false}; - Gaudi::Property m_passTracks{this, "PassTracks", false}; - Gaudi::Property m_finalFit{this, "FinalFit", true}; - Gaudi::Property m_fiducialCuts{this, "FiducialCuts", true}; - Gaudi::Property m_looseSectorSearch{this, "LooseSectorSearch", false}; - Gaudi::Property m_filterMode{this, "FilterMode", false}; - - mutable Gaudi::Accumulators::SummingCounter m_seedsCounter{this, "#seeds"}; - mutable Gaudi::Accumulators::SummingCounter m_tracksCounter{this, "#tracks"}; - - LHCb::UT::TrackUtils::MiniStates getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, - float zMidUT ) const; - - template - bool getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, - const std::array& compBoundsArray, LHCb::Pr::UT::Mut::Hits& hitsInLayers, - const std::size_t t, float zMidUT, int minLayers = totalUTLayers - 1 ) const; - - inline void findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, - const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, - const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, - const simd::float_v& yTol, const int firstIndex, const int lastIndex ) const; - - template - bool formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, ProtoTracks& pTracks, const int trackIndex, - float zMidUT ) const; - - template - void prepareOutputTrackSIMD( const ProtoTracks& protoTracks, - const std::array& hitsInLayers, - Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, - const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const; - - /// Multipurpose tool for Bdl and deflection - ToolHandle m_PrUTMagnetTool{this, "PrUTMagnetTool", "PrUTMagnetTool"}; - - mutable Gaudi::Accumulators::MsgCounter m_too_much_in_filtered{ - this, "Reached the maximum number of tracks in filteredStates!!"}; - mutable Gaudi::Accumulators::MsgCounter m_too_much_in_boundaries{ - this, "Reached the maximum number of tracks in Boundaries!!"}; - - constexpr static float c_zKink{1780.0}; - constexpr static float c_sigmaVeloSlope{0.10 * Gaudi::Units::mrad}; - constexpr static float c_invSigmaVeloSlope{10.0 / Gaudi::Units::mrad}; - }; -} // namespace LHCb::Pr - -//----------------------------------------------------------------------------- -// Implementation file for class : PrVeloUT -// -// 2007-05-08: Mariusz Witek -// 2017-03-01: Christoph Hasse (adapt to future framework) -// 2019-04-26: Arthur Hennequin (change data Input/Output) -//----------------------------------------------------------------------------- - -// Declaration of the Algorithm Factory -DECLARE_COMPONENT_WITH_ID( LHCb::Pr::VeloUT, "PrVeloUT" ) - -using TracksTag = LHCb::Pr::Upstream::Tag; -namespace HitTag = LHCb::Pr::UT::Mut::HitTag; -namespace LHCb::Pr { - namespace { simd::mask_v CholeskyDecomposition3( const std::array& mat, std::array& rhs ) { // -- copied from Root::Math::CholeskyDecomp @@ -334,17 +214,16 @@ namespace LHCb::Pr { return mask; } - // -- parameters that describe the z position of the kink point as a function of ty in a 4th order polynomial (even - // terms only) - constexpr auto magFieldParams = std::array{2010.0f, -2240.0f, -71330.f}; - // perform a fit using trackhelper's best hits with y correction, improve qop estimate simd::float_v fastfitterSIMD( std::array& improvedParams, const ProtoTracks& protoTracks, const float zMidUT, const simd::float_v qpxz2p, const int t, simd::mask_v& goodFitMask ) { - - const Vec3 pos = protoTracks.pos( t ); - const Vec3 dir = protoTracks.dir( t ); + // -- parameters that describe the z position of the kink point as a function of ty in a 4th order polynomial + // (even + // terms only) + constexpr auto magFieldParams = std::array{2010.0f, -2240.0f, -71330.f}; + const Vec3 pos = protoTracks.pos( t ); + const Vec3 dir = protoTracks.dir( t ); const simd::float_v x = pos.x; const simd::float_v y = pos.y; @@ -440,11 +319,6 @@ namespace LHCb::Pr { coeffs[2] * log( inputValues[1] ) + coeffs[3] * log( inputValues[2] ); } - /* - simd::float_v calcXTol( const simd::float_v minMom, const simd::float_v ty ) { - return ( 38000.0f / minMom + 0.25f ) * ( 1.0f + ty * ty * 0.8f ); - } - */ // -------------------------------------------------------------------- // -- Helper function to calculate the planeCode: 0 - 1 - 2 - 3 // -------------------------------------------------------------------- @@ -476,10 +350,6 @@ namespace LHCb::Pr { return ( index3 * 11 + index2 ) * 31 + index1; } - constexpr auto minValsBdl = std::array{-0.3f, -250.0f, 0.0f}; - constexpr auto maxValsBdl = std::array{0.3f, 250.0f, 800.0f}; - constexpr auto deltaBdl = std::array{0.02f, 50.0f, 80.0f}; - // constexpr auto dxDyHelper = std::array{0.0f, 1.0f, -1.0f, 0.0f}; // =========================================================================================== // -- 2 helper functions for fit // -- Pseudo chi2 fit, templated for 3 or 4 hits @@ -497,6 +367,7 @@ namespace LHCb::Pr { rhs[0] += wi * ui; rhs[1] += wi * ui * dz; } + template inline __attribute__( ( always_inline ) ) void simpleFit( const std::array& indices, const LHCb::Pr::UT::Mut::Hits& hits, ProtoTracks& pTracks, @@ -555,12 +426,103 @@ namespace LHCb::Pr { } } // namespace - //============================================================================= - // Main execution - //============================================================================= - Upstream::Tracks VeloUT::operator()( const EventContext& evtCtx, const Velo::Tracks& inputTracks, - const LHCb::Pr::UT::Hits& hh, const VeloUTGeomCache& velogeom, - const DeMagnet& magnet ) const { + class PrVeloUT + : public Algorithm::Transformer> { + + public: + PrVeloUT( const std::string& name, ISvcLocator* pSvcLocator ) + : Transformer( name, pSvcLocator, + {KeyValue{"InputTracksName", "Rec/Track/Velo"}, KeyValue{"UTHits", UTInfo::HitLocation}, + KeyValue{"GeometryInfo", "AlgorithmSpecific-" + name + "-UTGeometryInfo"}, + KeyValue{"Magnet", Det::Magnet::det_path}}, + KeyValue{"OutputTracksName", "Rec/Track/UT"} ) {} + + StatusCode initialize() override { + return Transformer::initialize().andThen( [&] { + addConditionDerivation( {DeUTDetLocation::location(), m_PrUTMagnetTool->cacheLocation()}, + inputLocation(), + []( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) { + return VeloUTGeomCache( det, magtoolcache ); + } ); + } ); + } + + Upstream::Tracks operator()( const EventContext&, const Velo::Tracks&, const LHCb::Pr::UT::Hits&, + const VeloUTGeomCache&, const DeMagnet& ) const override final; + + private: + Gaudi::Property m_minMomentum{this, "MinMomentum", 1500.f * Gaudi::Units::MeV}; + Gaudi::Property m_minPT{this, "MinPT", 300.f * Gaudi::Units::MeV}; + Gaudi::Property m_minMomentumFinal{this, "MinMomentumFinal", 2500.f * Gaudi::Units::MeV}; + Gaudi::Property m_minPTFinal{this, "MinPTFinal", 425.f * Gaudi::Units::MeV}; + Gaudi::Property m_maxPseudoChi2{this, "MaxPseudoChi2", 1280.}; + Gaudi::Property m_yTol{this, "YTolerance", 0.5 * Gaudi::Units::mm}; // 0.8 + Gaudi::Property m_yTolSlope{this, "YTolSlope", 0.08}; // 0.2 + Gaudi::Property m_hitTol{this, "HitTol", 0.8 * Gaudi::Units::mm}; // 0.8 + Gaudi::Property m_deltaTx{this, "DeltaTx", 0.018}; // 0.02 + Gaudi::Property m_maxXSlope{this, "MaxXSlope", 0.350}; + Gaudi::Property m_maxYSlope{this, "MaxYSlope", 0.300}; + Gaudi::Property m_centralHoleSize{this, "CentralHoleSize", 33. * Gaudi::Units::mm}; + Gaudi::Property m_intraLayerDist{this, "IntraLayerDist", 15.0 * Gaudi::Units::mm}; + Gaudi::Property m_overlapTol{this, "OverlapTol", 0.5 * Gaudi::Units::mm}; + Gaudi::Property m_passHoleSize{this, "PassHoleSize", 40. * Gaudi::Units::mm}; + Gaudi::Property m_LD3Hits{this, "LD3HitsMin", -0.5}; + Gaudi::Property m_LD4Hits{this, "LD4HitsMin", -0.5}; + Gaudi::Property m_minLayers{this, "MinLayers", 3}; + + Gaudi::Property m_printVariables{this, "PrintVariables", false}; + Gaudi::Property m_passTracks{this, "PassTracks", false}; + Gaudi::Property m_finalFit{this, "FinalFit", true}; + Gaudi::Property m_fiducialCuts{this, "FiducialCuts", true}; + Gaudi::Property m_filterMode{this, "FilterMode", false}; + + mutable Gaudi::Accumulators::SummingCounter m_seedsCounter{this, "#seeds"}; + mutable Gaudi::Accumulators::SummingCounter m_tracksCounter{this, "#tracks"}; + + LHCb::UT::TrackUtils::MiniStates getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, + float zMidUT ) const; + + bool getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, + const std::array& compBoundsArray, + LHCb::Pr::UT::Mut::Hits& hitsInLayers, const std::size_t t, float zMidUT, + int minLayers = totalUTLayers - 1 ) const; + + inline void findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, + const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, + const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, + const simd::float_v& yTol, const int firstIndex, const int lastIndex ) const; + + template + bool formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, ProtoTracks& pTracks, const int trackIndex, + float zMidUT ) const; + + template + void prepareOutputTrackSIMD( const ProtoTracks& protoTracks, + const std::array& hitsInLayers, + Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, + const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const; + + /// Multipurpose tool for Bdl and deflection + ToolHandle m_PrUTMagnetTool{this, "PrUTMagnetTool", "PrUTMagnetTool"}; + + mutable Gaudi::Accumulators::MsgCounter m_too_much_in_filtered{ + this, "Reached the maximum number of tracks in filteredStates!!"}; + mutable Gaudi::Accumulators::MsgCounter m_too_much_in_boundaries{ + this, "Reached the maximum number of tracks in Boundaries!!"}; + + constexpr static float c_zKink{1780.0}; + constexpr static float c_sigmaVeloSlope{0.10 * Gaudi::Units::mrad}; + constexpr static float c_invSigmaVeloSlope{10.0 / Gaudi::Units::mrad}; + }; + + // Declaration of the Algorithm Factory + DECLARE_COMPONENT_WITH_ID( PrVeloUT, "PrVeloUT" ) + + Upstream::Tracks PrVeloUT::operator()( const EventContext& evtCtx, const Velo::Tracks& inputTracks, + const LHCb::Pr::UT::Hits& hh, const VeloUTGeomCache& velogeom, + const DeMagnet& magnet ) const { Upstream::Tracks outputTracks{&inputTracks, Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx )}; outputTracks.reserve( inputTracks.size() ); @@ -688,11 +650,12 @@ namespace LHCb::Pr { m_tracksCounter += outputTracks.size(); return outputTracks; } + //============================================================================= // Get the state, do some cuts //============================================================================= __attribute__( ( flatten ) ) LHCb::UT::TrackUtils::MiniStates - VeloUT::getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, float zMidUT ) const { + PrVeloUT::getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, float zMidUT ) const { LHCb::UT::TrackUtils::MiniStates filteredStates{Zipping::generateZipIdentifier(), {inputTracks.get_allocator().resource()}}; @@ -748,10 +711,10 @@ namespace LHCb::Pr { //============================================================================= template inline __attribute__( ( always_inline ) ) bool - VeloUT::getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, - const std::array& compBoundsArray, - LHCb::Pr::UT::Mut::Hits& hitsInLayers, const std::size_t t, float zMidUT, - int minLayers ) const { + PrVeloUT::getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, + const std::array& compBoundsArray, + LHCb::Pr::UT::Mut::Hits& hitsInLayers, const std::size_t t, float zMidUT, + int minLayers ) const { // -- This is for some sanity checks later [[maybe_unused]] const int maxNumSectors = m_looseSectorSearch @@ -840,10 +803,10 @@ namespace LHCb::Pr { // -- Method that finds the hits in a given layer within a certain range // ============================================================================== inline __attribute__( ( always_inline ) ) void - VeloUT::findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, - const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, - const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, const simd::float_v& yTol, - const int firstIndex, const int lastIndex ) const { + PrVeloUT::findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, + const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, + const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, const simd::float_v& yTol, + const int firstIndex, const int lastIndex ) const { const auto& myHits = hh; const auto myHs = myHits.simd(); @@ -887,9 +850,9 @@ namespace LHCb::Pr { // Form clusters //========================================================================= template - inline __attribute__( ( always_inline ) ) bool VeloUT::formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, - ProtoTracks& pTracks, const int trackIndex, - float zMidUT ) const { + inline __attribute__( ( always_inline ) ) bool PrVeloUT::formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, + ProtoTracks& pTracks, const int trackIndex, + float zMidUT ) const { const int begin0 = forward ? hitsInLayers.layerIndices[0] : hitsInLayers.layerIndices[3]; const int end0 = forward ? hitsInLayers.layerIndices[1] : hitsInLayers.size(); @@ -986,10 +949,10 @@ namespace LHCb::Pr { //========================================================================= template inline __attribute__( ( always_inline ) ) __attribute__( ( flatten ) ) void - VeloUT::prepareOutputTrackSIMD( const ProtoTracks& protoTracks, - const std::array& hitsInLayers, - Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, - const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const { + PrVeloUT::prepareOutputTrackSIMD( const ProtoTracks& protoTracks, + const std::array& hitsInLayers, + Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, + const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const { auto const velozipped = inputTracks.simd(); const auto pSize = protoTracks.size; @@ -1005,8 +968,6 @@ namespace LHCb::Pr { // -- this is to filter tracks where the fit had a too large chi2 simd::mask_v fourHitTrack = protoTracks.weight( t, 3 ) > 0.0001f; - // const float bdl1 = m_PrUTMagnetTool->bdlIntegral(helper.state.ty,zOrigin,helper.state.z); - // -- These are calculations, copied and simplified from PrTableForFunction // -- FIXME: these rely on the internal details of PrTableForFunction!!! // and should at least be put back in there, and used from here @@ -1025,6 +986,9 @@ namespace LHCb::Pr { gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) )}; + constexpr auto minValsBdl = std::array{-0.3f, -250.0f, 0.0f}; + constexpr auto maxValsBdl = std::array{0.3f, 250.0f, 800.0f}; + constexpr auto deltaBdl = std::array{0.02f, 50.0f, 80.0f}; const std::array boundaries = {-0.3f + simd::float_v{index1} * deltaBdl[0], -250.0f + simd::float_v{index2} * deltaBdl[1], 0.0f + simd::float_v{index3} * deltaBdl[2]}; @@ -1215,4 +1179,4 @@ namespace LHCb::Pr { } } // loop on t } -} // namespace LHCb::Pr +} // namespace LHCb::Pr::VeloUT -- GitLab From e5a5a693a4a53077ba54637b8e43ba5423c2e0bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20G=C3=BCnther?= Date: Fri, 5 May 2023 14:17:11 +0200 Subject: [PATCH 2/4] Use SOACollection for ProtoTracks in PrVeloUT --- Pr/PrVeloUT/src/PrVeloUT.cpp | 806 +++++++++++++++-------------------- 1 file changed, 351 insertions(+), 455 deletions(-) diff --git a/Pr/PrVeloUT/src/PrVeloUT.cpp b/Pr/PrVeloUT/src/PrVeloUT.cpp index 51606bef095..935980bfccd 100644 --- a/Pr/PrVeloUT/src/PrVeloUT.cpp +++ b/Pr/PrVeloUT/src/PrVeloUT.cpp @@ -27,6 +27,7 @@ #include "Event/PrHits.h" #include "Event/PrUpstreamTracks.h" #include "Event/PrVeloTracks.h" +#include "Event/SOACollection.h" #include "Event/UTSectorHelper.h" #include "Event/UTTrackUtils.h" #include "Event/ZipUtils.h" @@ -43,130 +44,69 @@ namespace LHCb::Pr::VeloUT { namespace { - using simd = SIMDWrapper::best::types; - using scalar = SIMDWrapper::scalar::types; - using TracksTag = Upstream::Tag; + using simd = SIMDWrapper::best::types; + using scalar = SIMDWrapper::scalar::types; + using TracksTag = Upstream::Tag; + namespace HitTag = UT::Mut::HitTag; + namespace TU = LHCb::UT::TrackUtils; + constexpr auto EndVelo = Event::Enum::State::Location::EndVelo; constexpr auto nanMomentum = std::numeric_limits::quiet_NaN(); - constexpr auto batchSize = 2 * simd::size; constexpr auto totalUTLayers = static_cast( UTInfo::DetectorNumbers::TotalLayers ); - - struct ProtoTracks final { - - std::array wbs; - std::array xMidFields; - std::array invKinkVeloDists; - + // -- Used for the calculation of the size of the search windows + constexpr std::array normFact{0.95f, 1.0f, 1.36f, 1.41f}; + using LooseBounds = std::array; + using NominalBounds = std::array; + + struct ProtoTrackTag { + struct InitialWeight : Event::float_field {}; + struct XMidField : Event::float_field {}; + struct InvKinkVeloDist : Event::float_field {}; // -- this is for the hits // -- this does _not_ include overlap hits, so only 4 per track - std::array xs; - std::array zs; - std::array weightss{}; // this needs to be zero-initialized - std::array sins; - std::array ids; - std::array hitIndexs; - - // -- this is the output of the fit - std::array qps; - std::array chi2TTs; - std::array xTTs; - std::array xSlopeTTs; - std::array ys; - + struct X : Event::floats_field<4> {}; + struct Z : Event::floats_field<4> {}; + struct Weight : Event::floats_field<4> {}; + struct Sin : Event::floats_field<4> {}; + struct ID : Event::ints_field<4> {}; + struct HitIndex : Event::ints_field<4> {}; + // fit output + struct QOverP : Event::float_field {}; + struct Chi2 : Event::float_field {}; + struct XOffset : Event::float_field {}; + struct XSlope : Event::float_field {}; + struct YSlope : Event::float_field {}; // -- and this the original state (in the Velo) - std::array statePoss; - std::array stateDirs; - std::array ancestorIndexs; - + struct VeloState : Event::pos_dir_field {}; // -- and this an index to find the hit containers - std::array hitContIndexs; - - std::size_t size{0}; - SOA_ACCESSOR_VAR( x, &( xs[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( z, &( zs[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( weight, &( weightss[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( sin, &( sins[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( id, &( ids[pos * batchSize] ), int pos ) - SOA_ACCESSOR_VAR( hitIndex, &( hitIndexs[pos * batchSize] ), int pos ) - - SOA_ACCESSOR( qp, qps.data() ) - SOA_ACCESSOR( chi2TT, chi2TTs.data() ) - SOA_ACCESSOR( xTT, xTTs.data() ) - SOA_ACCESSOR( xSlopeTT, xSlopeTTs.data() ) - SOA_ACCESSOR( y, ys.data() ) - - SOA_ACCESSOR( ancestorIndex, ancestorIndexs.data() ) - SOA_ACCESSOR( hitContIndex, hitContIndexs.data() ) - VEC3_SOA_ACCESSOR( pos, (float*)&( statePoss[0] ), (float*)&( statePoss[batchSize] ), - (float*)&( statePoss[2 * batchSize] ) ) - VEC3_XY_SOA_ACCESSOR( dir, (float*)&( stateDirs[0] ), (float*)&( stateDirs[batchSize] ), 1.0f ) - - SOA_ACCESSOR( wb, wbs.data() ) - SOA_ACCESSOR( xMidField, xMidFields.data() ) - SOA_ACCESSOR( invKinkVeloDist, invKinkVeloDists.data() ) - - void initTracks( int indexVal, float maxPseudoChi2 ) { - hitIndexs.fill( indexVal ); - chi2TTs.fill( maxPseudoChi2 ); - size = 0; - } - - template - void fillHelperParams( Vec3 pos, Vec3 dir, const float zKink, - const float sigmaVeloSlope ) { + struct Ancestor : Event::int_field {}; + struct HitIndexContainer : Event::int_field {}; - using F = typename dType::float_v; - - F( pos.x + dir.x * ( zKink - pos.z ) ).store( xMidFields.data() ); - F a = sigmaVeloSlope * ( zKink - pos.z ); - F( 1.0f / ( a * a ) ).store( wbs.data() ); - F( 1.0f / ( zKink - pos.z ) ).store( invKinkVeloDists.data() ); - } + template + using prototrack_t = + Event::SOACollection; + }; - template - void storeSimpleFitInfo( typename dType::float_v chi2TT, typename dType::float_v qp, typename dType::float_v xTT, - typename dType::float_v xSlopeTT, unsigned int trackIndex ) { + struct ProtoTracks : ProtoTrackTag::prototrack_t { + using base_t = typename ProtoTrackTag::prototrack_t; + using base_t::allocator_type; + using base_t::base_t; - using F = typename dType::float_v; + template + struct ProtoTrackProxy : Event::Proxy { + using base_t = typename Event::Proxy; + using base_t::base_t; + auto isInvalid() const { return this->template field( 0 ).get() == -1; } + }; - F( chi2TT ).store( &chi2TTs[trackIndex] ); - F( qp ).store( &qps[trackIndex] ); - F( xTT ).store( &xTTs[trackIndex] ); - F( xSlopeTT ).store( &xSlopeTTs[trackIndex] ); - } + template + using proxy_type = ProtoTrackProxy; + }; - // -- this runs over all 4 layers, even if no hit was found - // -- but it fills a weight of 0 - // -- Note: These are not "physical" layers, as the hits are ordered such that only - // -- the last one can be not filled. - template - void fillHitInfo( const LHCb::Event::Zip& hitsInL, - unsigned int trackIndex ) { - - using F = typename dType::float_v; - using I = typename dType::int_v; - auto nValid{0}; - if ( hitsInL.size() != 0 ) { - for ( int i = 0; i < totalUTLayers; ++i ) { - int hitI = hitIndexs[trackIndex + i * batchSize]; - const auto valid = ( hitI != -1 ); - const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; - nValid += valid; - hitI = std::max( 0, hitI ); - LHCb::LHCbID id( LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() ) ); - - F( weight ).store( &weightss[trackIndex + i * batchSize] ); - F( hitsInL[hitI].x() ).store( &xs[trackIndex + i * batchSize] ); - F( hitsInL[hitI].z() ).store( &zs[trackIndex + i * batchSize] ); - F( hitsInL[hitI].sin() ).store( &sins[trackIndex + i * batchSize] ); - I( hitsInL[hitI].index() ).store( &hitIndexs[trackIndex + i * batchSize] ); - I( id.lhcbID() ).store( &ids[trackIndex + i * batchSize] ); - } - } - // this is useful in filterMode - if ( !nValid ) { F( nanMomentum ).store( &qps[trackIndex] ); } - } + struct VeloState { + float x, y, z, tx, ty; }; struct VeloUTGeomCache { @@ -215,49 +155,48 @@ namespace LHCb::Pr::VeloUT { } // perform a fit using trackhelper's best hits with y correction, improve qop estimate - simd::float_v fastfitterSIMD( std::array& improvedParams, const ProtoTracks& protoTracks, - const float zMidUT, const simd::float_v qpxz2p, const int t, - simd::mask_v& goodFitMask ) { + template + simd::float_v fastfitterSIMD( std::array& improvedParams, Proxy pTrack, const float zMidUT, + const simd::float_v qpxz2p, simd::mask_v& goodFitMask ) { // -- parameters that describe the z position of the kink point as a function of ty in a 4th order polynomial // (even // terms only) - constexpr auto magFieldParams = std::array{2010.0f, -2240.0f, -71330.f}; - const Vec3 pos = protoTracks.pos( t ); - const Vec3 dir = protoTracks.dir( t ); - - const simd::float_v x = pos.x; - const simd::float_v y = pos.y; - const simd::float_v z = pos.z; - const simd::float_v tx = dir.x; - const simd::float_v ty = dir.y; - const simd::float_v zKink = - magFieldParams[0] - ty * ty * magFieldParams[1] - ty * ty * ty * ty * magFieldParams[2]; - const simd::float_v xMidField = x + tx * ( zKink - z ); - - const simd::float_v zDiff = 0.001f * ( zKink - zMidUT ); + constexpr auto magFieldParams = std::array{2010.0f, -2240.0f, -71330.f}; + const auto ty = pTrack.template field().ty(); + const auto tx = pTrack.template field().tx(); + const auto z = pTrack.template field().z(); + const auto y = pTrack.template field().y(); + const auto x = pTrack.template field().x(); + const auto ty2 = ty * ty; + const auto zKink = magFieldParams[0] - ty2 * magFieldParams[1] - ty2 * ty2 * magFieldParams[2]; + const auto xMidField = x + tx * ( zKink - z ); + const auto zDiff = 0.001f * ( zKink - zMidUT ); // -- This is to avoid division by zero... - const simd::float_v pHelper = max( abs( protoTracks.qp( t ) * qpxz2p ), 1e-9f ); - const simd::float_v invP = pHelper * 1.f / sqrt( 1.0f + ty * ty ); + const auto qop = pTrack.template field().get(); + const auto pHelper = max( abs( qop * qpxz2p ), 1e-9f ); + const auto invP = pHelper * 1.f / sqrt( 1.0f + ty2 ); // these resolution are semi-empirical, could be tuned and might not be correct for low momentum. - const simd::float_v error1 = - 0.14f + 10000.0f * invP; // this is the resolution due to multiple scattering between Velo and UT - const simd::float_v error2 = 0.12f + 3000.0f * invP; // this is the resolution due to the finite Velo resolution - const simd::float_v error = error1 * error1 + error2 * error2; - const simd::float_v weight = 1.0f / error; + // this is the resolution due to multiple scattering between Velo and UT + const auto error1 = 0.14f + 10000.0f * invP; + // this is the resolution due to the finite Velo resolution + const auto error2 = 0.12f + 3000.0f * invP; + + const auto error = error1 * error1 + error2 * error2; + const auto weight = 1.0f / error; std::array mat = {weight, weight * zDiff, weight * zDiff * zDiff, 0.0f, 0.0f, 0.0f}; std::array rhs = {weight * xMidField, weight * xMidField * zDiff, 0.0f}; - for ( int i = 0; i < 4; ++i ) { + for ( int i = 0; i < totalUTLayers; ++i ) { // -- there are 3-hit candidates, but we'll // -- just treat them like 4-hit candidates // -- with 0 weight for the last hit - const simd::float_v ui = protoTracks.x( t, i ); - const simd::float_v dz = 0.001f * ( protoTracks.z( t, i ) - zMidUT ); - const simd::float_v w = protoTracks.weight( t, i ); - const simd::float_v ta = protoTracks.sin( t, i ); + const auto ui = pTrack.template field( i ).get(); + const auto dz = 0.001f * ( pTrack.template field( i ).get() - zMidUT ); + const auto w = pTrack.template field( i ).get(); + const auto ta = pTrack.template field( i ).get(); mat[0] += w; mat[1] += w * dz; mat[2] += w * dz * dz; @@ -268,46 +207,43 @@ namespace LHCb::Pr::VeloUT { rhs[1] += w * ui * dz; rhs[2] += w * ui * ta; } - + // TODO: this is overkill for dimension 3, vanilla matrix inversion should be fine goodFitMask = !CholeskyDecomposition3( mat, rhs ); - const simd::float_v xUTFit = rhs[0]; - const simd::float_v xSlopeUTFit = 0.001f * rhs[1]; - const simd::float_v offsetY = rhs[2]; + const auto xUTFit = rhs[0]; + const auto xSlopeUTFit = 0.001f * rhs[1]; + const auto offsetY = rhs[2]; - const simd::float_v distX = ( xMidField - xUTFit - xSlopeUTFit * ( zKink - zMidUT ) ); + const auto distX = xMidField - xUTFit - xSlopeUTFit * ( zKink - zMidUT ); // -- This takes into account that the distance between a point and track is smaller than the distance on the // x-axis - const simd::float_v distCorrectionX2 = 1.0f / ( 1 + xSlopeUTFit * xSlopeUTFit ); - simd::float_v chi2 = weight * ( distX * distX * distCorrectionX2 + offsetY * offsetY / ( 1.0f + ty * ty ) ); - - for ( int i = 0; i < 4; ++i ) { - - const simd::float_v dz = protoTracks.z( t, i ) - zMidUT; - const simd::float_v w = protoTracks.weight( t, i ); - const simd::float_v dist = ( protoTracks.x( t, i ) - xUTFit - xSlopeUTFit * dz - - offsetY * protoTracks.sin( t, i ) ); - + const auto distCorrectionX2 = 1.0f / ( 1 + xSlopeUTFit * xSlopeUTFit ); + auto chi2 = weight * ( distX * distX * distCorrectionX2 + offsetY * offsetY / ( 1.0f + ty2 ) ); + for ( int i = 0; i < totalUTLayers; ++i ) { + const auto dz = pTrack.template field( i ).get() - zMidUT; + const auto w = pTrack.template field( i ).get(); + const auto x = pTrack.template field( i ).get(); + const auto sin = pTrack.template field( i ).get(); + const auto dist = x - xUTFit - xSlopeUTFit * dz - offsetY * sin; chi2 += w * dist * dist * distCorrectionX2; } // new VELO slope x - const simd::float_v xb = - 0.5f * ( ( xUTFit + xSlopeUTFit * ( zKink - zMidUT ) ) + xMidField ); // the 0.5 is empirical - const simd::float_v xSlopeVeloFit = ( xb - x ) / ( zKink - z ); + const auto xb = 0.5f * ( ( xUTFit + xSlopeUTFit * ( zKink - zMidUT ) ) + xMidField ); // the 0.5 is empirical + const auto xSlopeVeloFit = ( xb - x ) / ( zKink - z ); improvedParams = {xUTFit, xSlopeUTFit, y + ty * ( zMidUT - z ) + offsetY, chi2}; // calculate q/p - const simd::float_v sinInX = xSlopeVeloFit / sqrt( 1.0f + xSlopeVeloFit * xSlopeVeloFit + ty * ty ); - const simd::float_v sinOutX = xSlopeUTFit / sqrt( 1.0f + xSlopeUTFit * xSlopeUTFit + ty * ty ); + const auto sinInX = xSlopeVeloFit / sqrt( 1.0f + xSlopeVeloFit * xSlopeVeloFit + ty2 ); + const auto sinOutX = xSlopeUTFit / sqrt( 1.0f + xSlopeUTFit * xSlopeUTFit + ty2 ); return ( sinInX - sinOutX ); } // -- Evaluate the linear discriminant // -- Coefficients derived with LD method for p, pT and chi2 with TMVA template - simd::float_v evaluateLinearDiscriminantSIMD( const std::array& inputValues ) { + simd::float_v evaluateLinearDiscriminantSIMD( std::array inputValues ) { constexpr auto coeffs = ( nHits == 3 ? std::array{0.162880166064f, -0.107081172665f, 0.134153123662f, -0.137764853657f} @@ -343,7 +279,7 @@ namespace LHCb::Pr::VeloUT { } // -------------------------------------------------------------------- - // -- These things are all hardcopied from the PrTableForFunction + // -- TODO: These things are all hardcopied from the PrTableForFunction // -- and PrUTMagnetTool // -- If the granularity or whatever changes, this will give wrong results simd::int_v masterIndexSIMD( const simd::int_v index1, const simd::int_v index2, const simd::int_v index3 ) { @@ -354,8 +290,7 @@ namespace LHCb::Pr::VeloUT { // -- 2 helper functions for fit // -- Pseudo chi2 fit, templated for 3 or 4 hits // =========================================================================================== - inline __attribute__( ( always_inline ) ) void - addHit( span mat, span rhs, const LHCb::Pr::UT::Mut::Hits& hits, int index, float zMidUT ) { + void addHit( span mat, span rhs, const UT::Mut::Hits& hits, int index, float zMidUT ) { const auto& hit = hits.scalar()[index]; const float ui = hit.x().cast(); const float ci = hit.cos().cast(); @@ -368,28 +303,27 @@ namespace LHCb::Pr::VeloUT { rhs[1] += wi * ui * dz; } - template - inline __attribute__( ( always_inline ) ) void - simpleFit( const std::array& indices, const LHCb::Pr::UT::Mut::Hits& hits, ProtoTracks& pTracks, - const int trackIndex, float zMidUT, float zKink, float invSigmaVeloSlope ) { + template + [[gnu::always_inline]] inline void simpleFit( const std::array& indices, const UT::Mut::Hits& hits, + Proxy pTrack, float zMidUT, float zKink, float invSigmaVeloSlope ) { static_assert( N == 3 || N == 4 ); // -- Scale the z-component, to not run into numerical problems // -- with floats - const float wb = pTracks.wb( 0 ).cast(); - const float xMidField = pTracks.xMidField( 0 ).cast(); - const float invKinkVeloDist = pTracks.invKinkVeloDist( 0 ).cast(); - const float stateX = pTracks.pos( trackIndex ).x.cast(); - const float stateTx = pTracks.dir( trackIndex ).x.cast(); + const auto wb = pTrack.template field().get().cast(); + const auto xMidField = pTrack.template field().get().cast(); + const auto invKinkVeloDist = pTrack.template field().get().cast(); + const auto stateX = pTrack.template field().x().cast(); + const auto stateTx = pTrack.template field().tx().cast(); const float zDiff = 0.001f * ( zKink - zMidUT ); auto mat = std::array{wb, wb * zDiff, wb * zDiff * zDiff}; auto rhs = std::array{wb * xMidField, wb * xMidField * zDiff}; auto const muthit = hits.scalar(); - std::for_each( indices.begin(), indices.end(), - [&]( const auto index ) { addHit( mat, rhs, hits, index, zMidUT ); } ); + for ( auto index : indices ) { addHit( mat, rhs, hits, index, zMidUT ); } + // TODO: this is overkill for dimension 2, vanilla matrix inversion should be fine ROOT::Math::CholeskyDecomp decomp( mat.data() ); if ( !decomp ) return; @@ -412,16 +346,18 @@ namespace LHCb::Pr::VeloUT { } ) / ( N + 1 - 2 ); - if ( chi2TT < pTracks.chi2TT( trackIndex ).cast() ) { + if ( chi2TT < pTrack.template field().get().cast() ) { // calculate q/p const float sinInX = xSlopeVeloFit * vdt::fast_isqrtf( 1.0f + xSlopeVeloFit * xSlopeVeloFit ); const float sinOutX = xSlopeTTFit * vdt::fast_isqrtf( 1.0f + xSlopeTTFit * xSlopeTTFit ); - const float qp = ( sinInX - sinOutX ); - - pTracks.storeSimpleFitInfo( chi2TT, qp, xTTFit, xSlopeTTFit, trackIndex ); - for ( std::size_t i = 0; i < N; i++ ) { pTracks.store_hitIndex( trackIndex, i, indices[i] ); } - if constexpr ( N == 3 ) { pTracks.store_hitIndex( trackIndex, 3, -1 ); } + const float qp = sinInX - sinOutX; + pTrack.template field().set( chi2TT ); + pTrack.template field().set( qp ); + pTrack.template field().set( xTTFit ); + pTrack.template field().set( xSlopeTTFit ); + for ( std::size_t i = 0; i < N; i++ ) { pTrack.template field( i ).set( indices[i] ); } + if constexpr ( N == 3 ) { pTrack.template field( N ).set( -1 ); } } } } // namespace @@ -449,8 +385,8 @@ namespace LHCb::Pr::VeloUT { } ); } - Upstream::Tracks operator()( const EventContext&, const Velo::Tracks&, const LHCb::Pr::UT::Hits&, - const VeloUTGeomCache&, const DeMagnet& ) const override final; + Upstream::Tracks operator()( const EventContext&, const Velo::Tracks&, const UT::Hits&, const VeloUTGeomCache&, + const DeMagnet& ) const override final; private: Gaudi::Property m_minMomentum{this, "MinMomentum", 1500.f * Gaudi::Units::MeV}; @@ -477,6 +413,7 @@ namespace LHCb::Pr::VeloUT { Gaudi::Property m_finalFit{this, "FinalFit", true}; Gaudi::Property m_fiducialCuts{this, "FiducialCuts", true}; Gaudi::Property m_filterMode{this, "FilterMode", false}; + Gaudi::Property m_looseSectorSearch{this, "LooseSectorSearch", false}; mutable Gaudi::Accumulators::SummingCounter m_seedsCounter{this, "#seeds"}; mutable Gaudi::Accumulators::SummingCounter m_tracksCounter{this, "#tracks"}; @@ -484,34 +421,25 @@ namespace LHCb::Pr::VeloUT { LHCb::UT::TrackUtils::MiniStates getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, float zMidUT ) const; - bool getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, - const std::array& compBoundsArray, - LHCb::Pr::UT::Mut::Hits& hitsInLayers, const std::size_t t, float zMidUT, - int minLayers = totalUTLayers - 1 ) const; + template + bool getHitsScalar( VeloState, int, float, int, const UT::Hits&, span, + UT::Mut::Hits& ) const; - inline void findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, - const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, - const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, - const simd::float_v& yTol, const int firstIndex, const int lastIndex ) const; + inline void findHits( const UT::Hits& hh, simd::float_v yProto, simd::float_v ty, simd::float_v tx, + simd::float_v xOnTrackProto, simd::float_v tolProto, simd::float_v xTolNormFact, + UT::Mut::Hits& mutHits, simd::float_v yTol, int firstIndex, int lastIndex ) const; - template - bool formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, ProtoTracks& pTracks, const int trackIndex, - float zMidUT ) const; + template + bool formClusters( const UT::Mut::Hits& hitsInLayers, Proxy pTracks, float zMidUT ) const; - template - void prepareOutputTrackSIMD( const ProtoTracks& protoTracks, - const std::array& hitsInLayers, + template + void prepareOutputTrackSIMD( const ProtoTracks& protoTracks, span hitsInLayers, Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const; /// Multipurpose tool for Bdl and deflection ToolHandle m_PrUTMagnetTool{this, "PrUTMagnetTool", "PrUTMagnetTool"}; - mutable Gaudi::Accumulators::MsgCounter m_too_much_in_filtered{ - this, "Reached the maximum number of tracks in filteredStates!!"}; - mutable Gaudi::Accumulators::MsgCounter m_too_much_in_boundaries{ - this, "Reached the maximum number of tracks in Boundaries!!"}; - constexpr static float c_zKink{1780.0}; constexpr static float c_sigmaVeloSlope{0.10 * Gaudi::Units::mrad}; constexpr static float c_invSigmaVeloSlope{10.0 / Gaudi::Units::mrad}; @@ -521,7 +449,7 @@ namespace LHCb::Pr::VeloUT { DECLARE_COMPONENT_WITH_ID( PrVeloUT, "PrVeloUT" ) Upstream::Tracks PrVeloUT::operator()( const EventContext& evtCtx, const Velo::Tracks& inputTracks, - const LHCb::Pr::UT::Hits& hh, const VeloUTGeomCache& velogeom, + const UT::Hits& hh, const VeloUTGeomCache& velogeom, const DeMagnet& magnet ) const { Upstream::Tracks outputTracks{&inputTracks, Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx )}; @@ -533,117 +461,119 @@ namespace LHCb::Pr::VeloUT { LHCb::UT::TrackUtils::MiniStates filteredStates = getStates( inputTracks, outputTracks, velogeom.zMidUT ); - simd::float_v invMinPt = 1.0f / m_minPT.value(); - simd::float_v invMinMomentum = 1.0f / m_minMomentum.value(); - - // -- Used for the calculation of the size of the search windows - constexpr const std::array normFact{0.95f, 1.0f, 1.36f, 1.41f}; + const auto invMinPt = 1.0f / m_minPT; + const auto invMinMomentum = 1.0f / m_minMomentum; - auto extrapFunc = [&]( int layerIndex, simd::float_v x, simd::float_v y, simd::float_v z, simd::float_v tx, - simd::float_v ty, simd::float_v ) { + auto extrapolateToLayer = [this, invMinPt = invMinPt, invMinMomentum = invMinMomentum, &geometry, + &velogeom]( auto layerIndex, auto x, auto y, auto z, auto tx, auto ty, auto /*qop*/ ) { // -- this 0.002 seems a little odd... - const simd::float_v theta = max( 0.002f, sqrt( tx * tx + ty * ty ) ); - const simd::float_v invMinMom = min( invMinPt * theta, invMinMomentum ); + const auto theta = max( 0.002f, sqrt( tx * tx + ty * ty ) ); + const auto invMinMom = min( invMinPt * theta, invMinMomentum ); - const simd::float_v xTol = + const auto xTol = abs( velogeom.distToMomentum * invMinMom * normFact[layerIndex] ) - abs( tx ) * m_intraLayerDist.value(); - const simd::float_v yTol = m_yTol.value() + m_yTolSlope.value() * xTol; + const auto yTol = m_yTol.value() + m_yTolSlope.value() * xTol; - const simd::float_v zGeo{geometry.layers[layerIndex].z}; - const simd::float_v dxDy{geometry.layers[layerIndex].dxDy}; + const auto zGeo{geometry.layers[layerIndex].z}; + const auto dxDy{geometry.layers[layerIndex].dxDy}; - const simd::float_v yAtZ = y + ty * ( zGeo - z ); - const simd::float_v xLayer = x + tx * ( zGeo - z ); - const simd::float_v yLayer = yAtZ + yTol * dxDy; + const auto yAtZ = y + ty * ( zGeo - z ); + const auto xLayer = x + tx * ( zGeo - z ); + const auto yLayer = yAtZ + yTol * dxDy; return std::make_tuple( xLayer, yLayer, xTol, yTol ); }; - namespace TU = LHCb::UT::TrackUtils; - - std::variant, std::array> - compBoundsArray; - if ( m_looseSectorSearch ) { - compBoundsArray = LHCb::UTDAQ::findAllSectorsExtrap( - filteredStates, geometry, extrapFunc, m_minLayers ); - } else { + std::variant compBoundsArray; + if ( !m_looseSectorSearch ) { compBoundsArray = LHCb::UTDAQ::findAllSectorsExtrap( - filteredStates, geometry, extrapFunc, m_minLayers ); + filteredStates, geometry, extrapolateToLayer, m_minLayers ); + } else { + compBoundsArray = LHCb::UTDAQ::findAllSectorsExtrap( + filteredStates, geometry, extrapolateToLayer, m_minLayers ); } - auto hitsInLayers = LHCb::make_object_array( Zipping::generateZipIdentifier(), - LHCb::getMemResource( evtCtx ) ); - for ( auto& hits : hitsInLayers ) { hits.reserve( 32 ); } - ProtoTracks pTracks; - - // -- We cannot put all found hits in an array, as otherwise the stack overflows - // -- so we just do the whole thing in batches - const std::size_t filteredStatesSize = filteredStates.size(); - for ( std::size_t t = 0; t < filteredStatesSize; t += batchSize ) { - - // -- This is scalar, as the hits are found in a scalar way - filteredStates.clear(); - for ( std::size_t t2 = 0; t2 < batchSize && t2 + t < filteredStatesSize; ++t2 ) { - const auto fSize = filteredStates.size(); - hitsInLayers[fSize].clear(); - hitsInLayers[fSize].layerIndices.fill( -1 ); - - if ( m_looseSectorSearch ) { - const bool foundHits = getHitsScalar( - hh, filteredStates, std::get>( compBoundsArray ), hitsInLayers[fSize], - t + t2, velogeom.zMidUT, m_minLayers ); - filteredStates.copy_back( filteredStates, t + t2, foundHits ); - } else { - const bool foundHits = getHitsScalar( - hh, filteredStates, std::get>( compBoundsArray ), - hitsInLayers[fSize], t + t2, velogeom.zMidUT, m_minLayers ); - filteredStates.copy_back( filteredStates, t + t2, foundHits ); - } - } - - pTracks.initTracks( -1, m_maxPseudoChi2.value() ); - for ( const auto& fState : filteredStates.scalar() ) { - const auto tEff = fState.offset(); - - Vec3 pos{fState.get().x(), - fState.get().y(), - fState.get().z()}; - Vec3 dir{fState.get().tx(), - fState.get().ty(), 1.f}; - - const int trackIndex = pTracks.size; - pTracks.fillHelperParams( pos, dir, c_zKink, c_sigmaVeloSlope ); - pTracks.store_pos( trackIndex, pos ); - pTracks.store_dir( trackIndex, dir ); - - if ( !formClusters( hitsInLayers[tEff], pTracks, trackIndex, velogeom.zMidUT ) ) { - formClusters( hitsInLayers[tEff], pTracks, trackIndex, velogeom.zMidUT ); - } + // TODO: these hit collections can be fully integrated into the ProtoTracks collection because currently + // we have exactly one of them per ProtoTrack. This can be done by replacing the static floats_field<4> by + // its vector variant and then storing all hits there (reducing them later to only 3 or 4). A good reason to + // do it is performance because the allocations below are relatively expensive and wasteful memory-wise. + // See issue https://gitlab.cern.ch/lhcb/Rec/-/issues/567 + std::vector hitsInLayers{}; + hitsInLayers.reserve( filteredStates.size() ); + for ( std::size_t i = 0; i < filteredStates.size(); ++i ) { + hitsInLayers.emplace_back( Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) ).reserve( 32 ); + } - if ( !m_filterMode && pTracks.hitIndex( trackIndex, 0 ).cast() == -1 ) continue; - - scalar::int_v ancestorIndex = fState.get(); - pTracks.store_ancestorIndex( trackIndex, ancestorIndex ); - pTracks.store_hitContIndex( trackIndex, tEff ); - // -- this runs over all 4 layers, even if no hit was found - // -- but it fills a weight of 0 - // -- Note: These are not "physical" layers, as the hits are ordered such that only - // -- the last one can be not filled. - auto hitsInL = hitsInLayers[tEff].scalar(); - pTracks.fillHitInfo( hitsInL, trackIndex ); - pTracks.size++; + auto pTracks = ProtoTracks{Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx )}; + pTracks.reserve( inputTracks.size() ); + + for ( auto fState : filteredStates.scalar() ) { + const auto x = fState.get().x().cast(); + const auto y = fState.get().y().cast(); + const auto z = fState.get().z().cast(); + const auto tx = fState.get().tx().cast(); + const auto ty = fState.get().ty().cast(); + const auto hitContainerIdx = pTracks.size(); + hitsInLayers[hitContainerIdx].clear(); + hitsInLayers[hitContainerIdx].layerIndices.fill( -1 ); + if ( !m_looseSectorSearch ? getHitsScalar( + {x, y, z, tx, ty}, fState.offset(), velogeom.zMidUT, m_minLayers, hh, + std::get( compBoundsArray ), hitsInLayers[hitContainerIdx] ) + : getHitsScalar( + {x, y, z, tx, ty}, fState.offset(), velogeom.zMidUT, m_minLayers, hh, + std::get( compBoundsArray ), hitsInLayers[hitContainerIdx] ) ) { + const auto ancestorIndex = fState.get(); + const auto zDiff = c_zKink - z; + const auto a = c_sigmaVeloSlope * zDiff; + auto pTrack = pTracks.emplace_back(); + pTrack.field().set( x + tx * zDiff ); + pTrack.field().set( 1.f / ( a * a ) ); + pTrack.field().set( 1.f / zDiff ); + pTrack.field().setPosition( x, y, z ); + pTrack.field().setDirection( tx, ty ); + pTrack.field().set( ancestorIndex ); + pTrack.field().set( m_maxPseudoChi2 ); + pTrack.field().set( hitContainerIdx ); + for ( auto i{0}; i < totalUTLayers; ++i ) { pTrack.field( i ).set( -1 ); } } + } - // padding to avoid FPEs - if ( ( pTracks.size + simd::size ) < batchSize ) { - pTracks.store_pos( pTracks.size, Vec3( 1.f, 1.f, 1.f ) ); - pTracks.store_dir( pTracks.size, Vec3( 1.f, 1.f, 1.f ) ); + for ( auto pTrack : pTracks.scalar() ) { + const auto hitsOffset = pTrack.offset(); + if ( !formClusters( hitsInLayers[hitsOffset], pTrack, velogeom.zMidUT ) ) { + formClusters( hitsInLayers[hitsOffset], pTrack, velogeom.zMidUT ); } - prepareOutputTrackSIMD( pTracks, hitsInLayers, outputTracks, inputTracks, bdlTable, magnet, velogeom.zMidUT ); + if ( !m_filterMode && pTrack.isInvalid() ) continue; + // -- this runs over all 4 layers, even if no hit was found + // -- but it fills a weight of 0 + // -- Note: These are not "physical" layers, as the hits are ordered such that only + // -- the last one can be not filled. + auto hitsInL = hitsInLayers[hitsOffset].scalar(); + auto nValid{0}; + if ( hitsInL.size() != 0 ) { + for ( auto i = 0; i < totalUTLayers; ++i ) { + auto hitI = pTrack.field( i ).get().cast(); + const auto valid = ( hitI != -1 ); + const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; + nValid += valid; + hitI = std::max( 0, hitI ); + const auto id = LHCbID{LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() )}; + pTrack.field( i ).set( weight ); + pTrack.field( i ).set( hitsInL[hitI].x() ); + pTrack.field( i ).set( hitsInL[hitI].z() ); + pTrack.field( i ).set( hitsInL[hitI].sin() ); + pTrack.field( i ).set( hitsInL[hitI].index() ); + pTrack.field( i ).set( id.lhcbID() ); + } + } + // this is useful in filterMode + if ( !nValid ) { pTrack.field().set( nanMomentum ); } } + prepareOutputTrackSIMD( pTracks, hitsInLayers, outputTracks, inputTracks, bdlTable, magnet, velogeom.zMidUT ); + // -- The algorithm should not store duplicated hits... assert( findDuplicates( outputTracks ) && "Hit duplicates found" ); @@ -654,8 +584,8 @@ namespace LHCb::Pr::VeloUT { //============================================================================= // Get the state, do some cuts //============================================================================= - __attribute__( ( flatten ) ) LHCb::UT::TrackUtils::MiniStates - PrVeloUT::getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, float zMidUT ) const { + LHCb::UT::TrackUtils::MiniStates PrVeloUT::getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, + float zMidUT ) const { LHCb::UT::TrackUtils::MiniStates filteredStates{Zipping::generateZipIdentifier(), {inputTracks.get_allocator().resource()}}; @@ -666,8 +596,8 @@ namespace LHCb::Pr::VeloUT { for ( auto const& velotrack : inputTracks.simd() ) { auto const loopMask = velotrack.loop_mask(); auto const trackVP = velotrack.indices(); - auto pos = velotrack.StatePos( Event::Enum::State::Location::EndVelo ); - auto dir = velotrack.StateDir( Event::Enum::State::Location::EndVelo ); + auto pos = velotrack.StatePos( EndVelo ); + auto dir = velotrack.StateDir( EndVelo ); simd::float_v xMidUT = pos.x() + dir.x() * ( zMidUT - pos.z() ); simd::float_v yMidUT = pos.y() + dir.y() * ( zMidUT - pos.z() ); @@ -710,25 +640,17 @@ namespace LHCb::Pr::VeloUT { // Find the hits //============================================================================= template - inline __attribute__( ( always_inline ) ) bool - PrVeloUT::getHitsScalar( const LHCb::Pr::UT::Hits& hh, const LHCb::UT::TrackUtils::MiniStates& filteredStates, - const std::array& compBoundsArray, - LHCb::Pr::UT::Mut::Hits& hitsInLayers, const std::size_t t, float zMidUT, - int minLayers ) const { - - // -- This is for some sanity checks later - [[maybe_unused]] const int maxNumSectors = m_looseSectorSearch - ? LHCb::UT::TrackUtils::maxNumSectorsBoundariesLoose - : LHCb::UT::TrackUtils::maxNumSectorsBoundariesNominal; + [[gnu::flatten]] bool + PrVeloUT::getHitsScalar( VeloState veloState, int stateOffset, float zMidUT, int minLayers, const UT::Hits& hh, + span compBoundsArray, UT::Mut::Hits& hitsInLayers ) const { const simd::float_v tolProto{m_yTol.value()}; - const auto fState = filteredStates.scalar(); - const auto xState = fState[t].get().x().cast(); - const auto yState = fState[t].get().y().cast(); - const auto zState = fState[t].get().z().cast(); - const auto txState = fState[t].get().tx().cast(); - const auto tyState = fState[t].get().ty().cast(); + const auto xState = veloState.x; + const auto yState = veloState.y; + const auto zState = veloState.z; + const auto txState = veloState.tx; + const auto tyState = veloState.ty; // in filter mode tracks close to the hole in the centre of the UT may have no hits if ( m_filterMode ) { @@ -746,24 +668,26 @@ namespace LHCb::Pr::VeloUT { const simd::float_v xOnTrackProto{xState - txState * zState}; const simd::float_v ty{tyState}; const simd::float_v tx{txState}; - // -- the second condition is to ensure at least 3 layers with hits for ( int layerIndex = 0; layerIndex < totalUTLayers && layerIndex - nLayers <= totalUTLayers - minLayers; ++layerIndex ) { const auto compBoundsArr = compBoundsArray[layerIndex].scalar(); - const auto xTolS = compBoundsArr[t].template get().cast(); - const auto nPos = compBoundsArr[t].template get().cast(); - const simd::float_v yTol = m_yTol.value() + m_yTolSlope.value() * xTolS; - const simd::float_v xTol = xTolS + abs( tx ) * m_intraLayerDist.value(); + const auto xTolS = compBoundsArr[stateOffset].template get().cast(); + const auto nPos = compBoundsArr[stateOffset].template get().cast(); + const simd::float_v yTol = m_yTol.value() + m_yTolSlope.value() * xTolS; + const simd::float_v xTol = xTolS + abs( tx ) * m_intraLayerDist.value(); - assert( nPos < maxNumSectors && "nPos out of bound" ); + assert( nPos < ( m_looseSectorSearch ? TU::maxNumSectorsBoundariesLoose : TU::maxNumSectorsBoundariesNominal ) && + "nPos out of bound" ); for ( int j = 0; j < nPos; j++ ) { - const int sectA = compBoundsArr[t].template get( j ).cast(); + const int sectA = compBoundsArr[stateOffset].template get( j ).cast(); const int sectB = - ( j == nPos - 1 ) ? sectA : compBoundsArr[t].template get( j + 1 ).cast(); + ( j == nPos - 1 ) + ? sectA + : compBoundsArr[stateOffset].template get( j + 1 ).cast(); assert( sectA != LHCb::UTDAQ::paddingSectorNumber && "sectA points to padding element" ); assert( sectB != LHCb::UTDAQ::paddingSectorNumber && "sectB points to padding element" ); @@ -777,9 +701,9 @@ namespace LHCb::Pr::VeloUT { // -- The idea is to merge adjacent ranges of indices, such that collecting hits is more efficient // -- let's try to make it branchless - const std::pair& temp = hh.indices( sectA ); - const std::pair& temp2 = hh.indices( sectB ); - const int firstIndex = temp.first; + const auto temp = hh.indices( sectA ); + const auto temp2 = hh.indices( sectB ); + const auto firstIndex = temp.first; // -- We put the lastIndex to the end of the next container if they join up // -- Note that this is _not_ fulfilled if the sector has elements and is a duplicate, // -- but this only happens if it is the padding element, in which case we are already at the last @@ -802,15 +726,13 @@ namespace LHCb::Pr::VeloUT { // ============================================================================== // -- Method that finds the hits in a given layer within a certain range // ============================================================================== - inline __attribute__( ( always_inline ) ) void - PrVeloUT::findHits( const LHCb::Pr::UT::Hits& hh, const simd::float_v& yProto, const simd::float_v& ty, - const simd::float_v& tx, const simd::float_v& xOnTrackProto, const simd::float_v& tolProto, - const simd::float_v& xTolNormFact, LHCb::Pr::UT::Mut::Hits& mutHits, const simd::float_v& yTol, - const int firstIndex, const int lastIndex ) const { - - const auto& myHits = hh; - const auto myHs = myHits.simd(); + [[gnu::always_inline]] inline void PrVeloUT::findHits( const UT::Hits& hh, simd::float_v yProto, simd::float_v ty, + simd::float_v tx, simd::float_v xOnTrackProto, + simd::float_v tolProto, simd::float_v xTolNormFact, + UT::Mut::Hits& mutHits, simd::float_v yTol, int firstIndex, + int lastIndex ) const { + const auto myHs = hh.simd(); for ( int i = firstIndex; i < lastIndex; i += simd::size ) { const auto mH = myHs[i]; @@ -849,10 +771,8 @@ namespace LHCb::Pr::VeloUT { //========================================================================= // Form clusters //========================================================================= - template - inline __attribute__( ( always_inline ) ) bool PrVeloUT::formClusters( const LHCb::Pr::UT::Mut::Hits& hitsInLayers, - ProtoTracks& pTracks, const int trackIndex, - float zMidUT ) const { + template + bool PrVeloUT::formClusters( const UT::Mut::Hits& hitsInLayers, Proxy pTrack, float zMidUT ) const { const int begin0 = forward ? hitsInLayers.layerIndices[0] : hitsInLayers.layerIndices[3]; const int end0 = forward ? hitsInLayers.layerIndices[1] : hitsInLayers.size(); @@ -867,8 +787,8 @@ namespace LHCb::Pr::VeloUT { const int end3 = forward ? hitsInLayers.size() : hitsInLayers.layerIndices[1]; bool fourLayerSolution = false; - const float stateTx = pTracks.dir( trackIndex ).x.cast(); - const auto& hitsInL = hitsInLayers.scalar(); + const auto stateTx = pTrack.template get().tx().cast(); + const auto hitsInL = hitsInLayers.scalar(); // -- this is scalar for the moment for ( int i0 = begin0; i0 < end0; ++i0 ) { @@ -919,25 +839,21 @@ namespace LHCb::Pr::VeloUT { } // -- All hits found if ( bestHit1Index != -1 && bestHit3Index != -1 ) { - simpleFit( std::array{i0, bestHit1Index, i2, bestHit3Index}, hitsInLayers, pTracks, trackIndex, zMidUT, - c_zKink, c_invSigmaVeloSlope ); + simpleFit( std::array{i0, bestHit1Index, i2, bestHit3Index}, hitsInLayers, pTrack, zMidUT, c_zKink, + c_invSigmaVeloSlope ); - if ( !fourLayerSolution && pTracks.hitIndex( trackIndex, 0 ).cast() != -1 ) { - fourLayerSolution = true; - } + if ( !fourLayerSolution && !pTrack.isInvalid() ) { fourLayerSolution = true; } continue; } // -- Nothing found in layer 3 if ( !fourLayerSolution && bestHit1Index != -1 ) { - simpleFit( std::array{i0, bestHit1Index, i2}, hitsInLayers, pTracks, trackIndex, zMidUT, c_zKink, - c_invSigmaVeloSlope ); + simpleFit( std::array{i0, bestHit1Index, i2}, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); continue; } // -- Noting found in layer 1 if ( !fourLayerSolution && bestHit3Index != -1 ) { - simpleFit( std::array{i0, bestHit3Index, i2}, hitsInLayers, pTracks, trackIndex, zMidUT, c_zKink, - c_invSigmaVeloSlope ); + simpleFit( std::array{i0, bestHit3Index, i2}, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); continue; } } @@ -947,99 +863,81 @@ namespace LHCb::Pr::VeloUT { //========================================================================= // Create the Velo-UT tracks //========================================================================= - template - inline __attribute__( ( always_inline ) ) __attribute__( ( flatten ) ) void - PrVeloUT::prepareOutputTrackSIMD( const ProtoTracks& protoTracks, - const std::array& hitsInLayers, - Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, - const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const { - - auto const velozipped = inputTracks.simd(); - const auto pSize = protoTracks.size; - for ( std::size_t t = 0; t < pSize; t += simd::size ) { - //== Handle states. copy Velo one, add TT. - const simd::float_v zOrigin = - select( protoTracks.dir( t ).y > 0.001f, - protoTracks.pos( t ).z - - protoTracks.pos( t ).y / protoTracks.dir( t ).y, - protoTracks.pos( t ).z - - protoTracks.pos( t ).x / protoTracks.dir( t ).x ); + template + void PrVeloUT::prepareOutputTrackSIMD( const ProtoTracks& protoTracks, span hitsInLayers, + Upstream::Tracks& outputTracks, const Velo::Tracks& inputTracks, + const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const { - // -- this is to filter tracks where the fit had a too large chi2 - simd::mask_v fourHitTrack = protoTracks.weight( t, 3 ) > 0.0001f; + const auto velozipped = inputTracks.simd(); + for ( auto [ipTrack, pTrack] : range::enumerate( protoTracks.simd() ) ) { + //== Handle states. copy Velo one, add TT. + const auto ty = pTrack.template field().ty(); + const auto tx = pTrack.template field().tx(); + const auto z = pTrack.template field().z(); + const auto y = pTrack.template field().y(); + const auto x = pTrack.template field().x(); + const auto zOrigin = select( ty > 0.001f, z - y / ty, z - x / tx ); // -- These are calculations, copied and simplified from PrTableForFunction // -- FIXME: these rely on the internal details of PrTableForFunction!!! // and should at least be put back in there, and used from here // to make sure everything _stays_ consistent... - auto var = std::array{protoTracks.dir( t ).y, zOrigin, protoTracks.pos( t ).z}; - - simd::int_v index1 = min( max( simd::int_v{( var[0] + 0.3f ) / 0.6f * 30}, 0 ), 30 ); - simd::int_v index2 = min( max( simd::int_v{( var[1] + 250 ) / 500 * 10}, 0 ), 10 ); - simd::int_v index3 = min( max( simd::int_v{var[2] / 800 * 10}, 0 ), 10 ); - - simd::float_v bdl = gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 ) ); + auto var = std::array{ty, zOrigin, z}; + auto index1 = min( max( simd::int_v{( var[0] + 0.3f ) / 0.6f * 30}, 0 ), 30 ); + auto index2 = min( max( simd::int_v{( var[1] + 250 ) / 500 * 10}, 0 ), 10 ); + auto index3 = min( max( simd::int_v{var[2] / 800 * 10}, 0 ), 10 ); + auto bdl = gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 ) ); // -- TODO: check if we can go outside this table... - const std::array bdls = - std::array{gather( bdlTable.table().data(), masterIndexSIMD( index1 + 1, index2, index3 ) ), - gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), - gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) )}; - - constexpr auto minValsBdl = std::array{-0.3f, -250.0f, 0.0f}; - constexpr auto maxValsBdl = std::array{0.3f, 250.0f, 800.0f}; - constexpr auto deltaBdl = std::array{0.02f, 50.0f, 80.0f}; - const std::array boundaries = {-0.3f + simd::float_v{index1} * deltaBdl[0], - -250.0f + simd::float_v{index2} * deltaBdl[1], - 0.0f + simd::float_v{index3} * deltaBdl[2]}; + const auto bdls = std::array{gather( bdlTable.table().data(), masterIndexSIMD( index1 + 1, index2, index3 ) ), + gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), + gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) )}; + + constexpr auto minValsBdl = std::array{-0.3f, -250.0f, 0.0f}; + constexpr auto maxValsBdl = std::array{0.3f, 250.0f, 800.0f}; + constexpr auto deltaBdl = std::array{0.02f, 50.0f, 80.0f}; + const auto boundaries = + std::array{-0.3f + simd::float_v{index1} * deltaBdl[0], -250.0f + simd::float_v{index2} * deltaBdl[1], + 0.0f + simd::float_v{index3} * deltaBdl[2]}; // -- This is an interpolation, to get a bit more precision simd::float_v addBdlVal{0.0f}; for ( int i = 0; i < 3; ++i ) { - // -- this should make sure that values outside the range add nothing to the sum - var[i] = select( minValsBdl[i] > var[i], boundaries[i], var[i] ); - var[i] = select( maxValsBdl[i] < var[i], boundaries[i], var[i] ); - - const simd::float_v dTab_dVar = ( bdls[i] - bdl ) / deltaBdl[i]; - const simd::float_v dVar = ( var[i] - boundaries[i] ); + var[i] = select( minValsBdl[i] > var[i], boundaries[i], var[i] ); + var[i] = select( maxValsBdl[i] < var[i], boundaries[i], var[i] ); + const auto dTab_dVar = ( bdls[i] - bdl ) / deltaBdl[i]; + const auto dVar = ( var[i] - boundaries[i] ); addBdlVal += dTab_dVar * dVar; } bdl += addBdlVal; // ---- // -- order is: x, tx, y, chi2 - std::array finalParams = { - protoTracks.xTT( t ), protoTracks.xSlopeTT( t ), - protoTracks.pos( t ).y + - protoTracks.dir( t ).y * ( zMidUT - protoTracks.pos( t ).z ), - protoTracks.chi2TT( t )}; - - const simd::float_v qpxz2p = -1.0f / bdl * 3.3356f / Gaudi::Units::GeV; - simd::mask_v fitMask = simd::mask_true(); - simd::float_v qp = m_finalFit ? fastfitterSIMD( finalParams, protoTracks, zMidUT, qpxz2p, t, fitMask ) - : protoTracks.qp( t ) / - sqrt( 1.0f + protoTracks.dir( t ).y * - protoTracks.dir( t ).y ); // is this correct? - - qp = select( fitMask, qp, protoTracks.qp( t ) ); - const simd::float_v qop = select( abs( bdl ) < 1.e-8f, simd::float_v{1000.0f}, qp * qpxz2p ); + auto finalParams = std::array{pTrack.template field().get(), + pTrack.template field().get(), y + ty * ( zMidUT - z ), + pTrack.template field().get()}; + + const auto qpxz2p = -1.0f / bdl * 3.3356f / Gaudi::Units::GeV; + auto fitMask = simd::mask_true(); + auto qp = m_finalFit + ? fastfitterSIMD( finalParams, pTrack, zMidUT, qpxz2p, fitMask ) + : pTrack.template field().get() / sqrt( 1.0f + ty * ty ); // is this correct? + + qp = select( fitMask, qp, pTrack.template field().get() ); + const auto qop = select( abs( bdl ) < 1.e-8f, simd::float_v{1000.0f}, qp * qpxz2p ); // -- Don't make tracks that have grossly too low momentum // -- Beware of the momentum resolution! - const simd::float_v p = abs( 1.0f / qop ); - const simd::float_v pt = - p * sqrt( protoTracks.dir( t ).x * protoTracks.dir( t ).x + - protoTracks.dir( t ).y * protoTracks.dir( t ).y ); - - const simd::float_v xUT = finalParams[0]; - const simd::float_v txUT = finalParams[1]; - const simd::float_v yUT = finalParams[2]; - const auto tyUT = protoTracks.dir( t ).y; + const auto p = abs( 1.0f / qop ); + const auto pt = p * sqrt( tx * tx + ty * ty ); + const auto xUT = finalParams[0]; + const auto txUT = finalParams[1]; + const auto yUT = finalParams[2]; // -- apply some fiducial cuts // -- they are optimised for high pT tracks (> 500 MeV) - simd::mask_v fiducialMask = simd::mask_false(); + auto fiducialMask = simd::mask_false(); if ( m_fiducialCuts ) { const float magSign = magnet.signedRelativeCurrent(); @@ -1050,48 +948,43 @@ namespace LHCb::Pr::VeloUT { fiducialMask = fiducialMask || ( magSign * qop < 0.0f && txUT > 0.09f + 0.0003f * pt ); fiducialMask = fiducialMask || ( magSign * qop > 0.0f && txUT < -0.09f - 0.0003f * pt ); } - // -- evaluate the linear discriminant and reject ghosts // -- the values only make sense if the final fit is performed - simd::mask_v mvaMask = simd::mask_true(); - - if ( m_finalFit ) { - - const simd::float_v fourHitDisc = evaluateLinearDiscriminantSIMD<4>( {p, pt, finalParams[3]} ); - const simd::float_v threeHitDisc = evaluateLinearDiscriminantSIMD<3>( {p, pt, finalParams[3]} ); - - simd::mask_v fourHitMask = fourHitDisc > m_LD4Hits.value(); - simd::mask_v threeHitMask = threeHitDisc > m_LD3Hits.value(); - + auto mvaMask = simd::mask_true(); + if ( const auto fourHitTrack = pTrack.template field( 3 ).get() > 0.0001f; m_finalFit ) { + const auto fourHitDisc = evaluateLinearDiscriminantSIMD<4>( {p, pt, finalParams[3]} ); + const auto threeHitDisc = evaluateLinearDiscriminantSIMD<3>( {p, pt, finalParams[3]} ); + const auto fourHitMask = fourHitDisc > m_LD4Hits.value(); + const auto threeHitMask = threeHitDisc > m_LD3Hits.value(); // -- only have 3 or 4 hit tracks mvaMask = ( fourHitTrack && fourHitMask ) || ( !fourHitTrack && threeHitMask ); } const auto pPTMask = p > m_minMomentumFinal.value() && pt > m_minPTFinal.value(); - const auto loopMask = simd::loop_mask( t, pSize ); - const auto validTrackMask = pPTMask && !fiducialMask && mvaMask && loopMask; + const auto loopMask = pTrack.loop_mask(); + const auto validTrackMask = pPTMask && !fiducialMask && mvaMask && loopMask && !pTrack.isInvalid(); const auto finalMask = m_filterMode ? loopMask : validTrackMask; const auto finalQoP = select( validTrackMask, qop, nanMomentum ); - const auto ancestor = protoTracks.ancestorIndex( t ); + const auto ancestor = pTrack.template field().get(); const auto velo_ancestor = velozipped.gather( ancestor, finalMask ); const auto currentsize = outputTracks.size(); - const auto oTrack = outputTracks.compress_back( finalMask ); - - oTrack.field().set( ancestor ); - oTrack.field().setQOverP( finalQoP ); - oTrack.field().setPosition( xUT, yUT, zMidUT ); - oTrack.field().setDirection( txUT, tyUT ); - oTrack.field().resize( velo_ancestor.nHits() ); - + auto oTrack = outputTracks.compress_back( finalMask ); + oTrack.template field().set( ancestor ); + oTrack.template field().setQOverP( finalQoP ); + oTrack.template field().setPosition( xUT, yUT, zMidUT ); + oTrack.template field().setDirection( txUT, ty ); + oTrack.template field().resize( velo_ancestor.nHits() ); for ( int idx = 0; idx < velo_ancestor.nHits().hmax( finalMask ); ++idx ) { - oTrack.field()[idx].field().set( velo_ancestor.vp_index( idx ) ); - oTrack.field()[idx].field().set( velo_ancestor.vp_lhcbID( idx ) ); + oTrack.template field()[idx].template field().set( + velo_ancestor.vp_index( idx ) ); + oTrack.template field()[idx].template field().set( + velo_ancestor.vp_lhcbID( idx ) ); } if ( m_filterMode ) { - oTrack.field().resize( 0 ); + oTrack.template field().resize( 0 ); continue; } @@ -1105,17 +998,17 @@ namespace LHCb::Pr::VeloUT { // -- from here on, go over each track individually to find and add the overlap hits // -- this is not particularly elegant... // -- As before, these are "pseudo layers", i.e. it is not guaranteed that if i > j, z[i] > z[j] - + const auto t = ipTrack * simd::size; + const auto pScalar = protoTracks.scalar(); for ( int iLayer = 0; iLayer < totalUTLayers; ++iLayer ) { int trackIndex2 = 0; for ( unsigned int t2 = 0; t2 < simd::size; ++t2 ) { if ( !testbit( finalMask, t2 ) ) continue; const auto tscalar = t + t2; - - const bool goodHit = ( protoTracks.weight( tscalar, iLayer ).cast() > 0.0001f ); - const auto hitIdx = protoTracks.hitIndex( tscalar, iLayer ); - const auto id = protoTracks.id( tscalar, iLayer ); - + const bool goodHit = + ( pScalar[tscalar].template field( iLayer ).get().cast() > 0.0001f ); + const auto hitIdx = pScalar[tscalar].template field( iLayer ).get(); + const auto id = pScalar[tscalar].template field( iLayer ).get(); // -- Only add the hit, if it is not in an empty layer (that sounds like a tautology, // -- but given that one always has 4 hits, even if only 3 make sense, it is needed) // -- Only the last pseudo-layer can be an empty layer @@ -1133,9 +1026,10 @@ namespace LHCb::Pr::VeloUT { // -- In layers where no hit was found initially, we use the better parametrization of the final // -- track fit to pick up hits that were lost in the initial search // ----------------------------------------------------------------------------------- - const float zhit = goodHit ? protoTracks.z( tscalar, iLayer ).cast() : zMidUT; - const float xhit = goodHit ? protoTracks.x( tscalar, iLayer ).cast() : xArray[t2]; - const int hitContIndex = protoTracks.hitContIndex( tscalar ).cast(); + const auto zhit = goodHit ? pScalar[tscalar].template field( iLayer ).get().cast() : zMidUT; + const auto xhit = + goodHit ? pScalar[tscalar].template field( iLayer ).get().cast() : xArray[t2]; + const auto hitContIndex = pScalar[tscalar].template field().get().cast(); // -- The total sum of all plane codes is: 0 + 1 + 2 + 3 = 6 // -- We can therefore get the plane code of the last pseudo-layer @@ -1150,20 +1044,22 @@ namespace LHCb::Pr::VeloUT { const int begin = hitsInLayers[hitContIndex].layerIndices[pC]; const int end = ( pC == 3 ) ? hitsInLayers[hitContIndex].size() : hitsInLayers[hitContIndex].layerIndices[pC + 1]; - const auto& hitsInL = hitsInLayers[hitContIndex].scalar(); + const auto hitsInL = hitsInLayers[hitContIndex].scalar(); + for ( int index2 = begin; index2 < end; ++index2 ) { const float zohit = hitsInL[index2].z().cast(); if ( essentiallyEqual( zohit, zhit ) ) continue; const float xohit = hitsInL[index2].x().cast(); const float xextrap = xhit + txUTS * ( zohit - zhit ); + if ( xohit - xextrap < -m_overlapTol ) continue; if ( xohit - xextrap > m_overlapTol ) break; - if ( nUTHits[t2] >= int( LHCb::Pr::Upstream::Tracks::MaxUTHits ) ) continue; - const scalar::int_v utidx = hitsInL[index2].index(); - LHCb::LHCbID oid( LHCb::Detector::UT::ChannelID( hitsInL[index2].channelID().cast() ) ); - const scalar::int_v lhcbid = bit_cast( oid.lhcbID() ); + if ( nUTHits[t2] >= static_cast( Upstream::Tracks::MaxUTHits ) ) continue; + const auto utidx = hitsInL[index2].index(); + const auto oid = LHCbID{LHCb::Detector::UT::ChannelID( hitsInL[index2].channelID().cast() )}; + const auto lhcbid = bit_cast( oid.lhcbID() ); auto hits = outputTracks.scalar()[currentsize + trackIndex2].field(); hits.resize( nUTHits[t2] + 1 ); -- GitLab From 7f491d18f1eff0e4537f3c7182949a75f5247924 Mon Sep 17 00:00:00 2001 From: decianm Date: Wed, 5 Feb 2025 20:32:11 +0100 Subject: [PATCH 3/4] tried to solve conflict --- Pr/PrVeloUT/src/PrVeloUT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pr/PrVeloUT/src/PrVeloUT.cpp b/Pr/PrVeloUT/src/PrVeloUT.cpp index 71c7f9d4bed..8b1698c893b 100644 --- a/Pr/PrVeloUT/src/PrVeloUT.cpp +++ b/Pr/PrVeloUT/src/PrVeloUT.cpp @@ -1,4 +1,4 @@ -/*****************************************************************************\ +/***************************************************************************** \ * (c) Copyright 2024 CERN for the benefit of the LHCb Collaboration * * * * This software is distributed under the terms of the GNU General Public * -- GitLab From a15bba299b7ba6329c3fde5e7823e6e485bf3357 Mon Sep 17 00:00:00 2001 From: Gitlab CI Date: Wed, 5 Feb 2025 19:33:16 +0000 Subject: [PATCH 4/4] pre-commit fixes patch generated by https://gitlab.cern.ch/lhcb/Rec/-/jobs/50503523 --- Pr/PrVeloUT/src/PrVeloUT.cpp | 156 +++++++++++++++++------------------ 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/Pr/PrVeloUT/src/PrVeloUT.cpp b/Pr/PrVeloUT/src/PrVeloUT.cpp index 8b1698c893b..d4e1d6fd06f 100644 --- a/Pr/PrVeloUT/src/PrVeloUT.cpp +++ b/Pr/PrVeloUT/src/PrVeloUT.cpp @@ -55,7 +55,7 @@ namespace LHCb::Pr::VeloUT { constexpr auto nanMomentum = std::numeric_limits::quiet_NaN(); constexpr auto totalUTLayers = static_cast( UTInfo::DetectorNumbers::TotalLayers ); // -- Used for the calculation of the size of the search windows - constexpr std::array normFact{0.95f, 1.0f, 1.36f, 1.41f}; + constexpr std::array normFact{ 0.95f, 1.0f, 1.36f, 1.41f }; using LooseBounds = std::array; using NominalBounds = std::array; @@ -110,7 +110,7 @@ namespace LHCb::Pr::VeloUT { }; struct VeloUTGeomCache { - VeloUTGeomCache( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) : common{det} { + VeloUTGeomCache( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) : common{ det } { // m_zMidUT is a position of normalization plane which should to be close to z middle of UT ( +- 5 cm ). // Cached once in VeloUTTool at initialization. No need to update with small UT movement. zMidUT = magtoolcache.zCenterUT; @@ -118,8 +118,8 @@ namespace LHCb::Pr::VeloUT { distToMomentum = 1.0f / magtoolcache.dist2mom; } UTDAQ::GeomCache common; - float zMidUT{0.f}; - float distToMomentum{0.f}; + float zMidUT{ 0.f }; + float distToMomentum{ 0.f }; }; simd::mask_v CholeskyDecomposition3( const std::array& mat, std::array& rhs ) { @@ -161,7 +161,7 @@ namespace LHCb::Pr::VeloUT { // -- parameters that describe the z position of the kink point as a function of ty in a 4th order polynomial // (even // terms only) - constexpr auto magFieldParams = std::array{2010.0f, -2240.0f, -71330.f}; + constexpr auto magFieldParams = std::array{ 2010.0f, -2240.0f, -71330.f }; const auto ty = pTrack.template field().ty(); const auto tx = pTrack.template field().tx(); const auto z = pTrack.template field().z(); @@ -370,14 +370,14 @@ namespace LHCb::Pr::VeloUT { public: PrVeloUT( const std::string& name, ISvcLocator* pSvcLocator ) : Transformer( name, pSvcLocator, - {KeyValue{"InputTracksName", "Rec/Track/Velo"}, KeyValue{"UTHits", UTInfo::HitLocation}, - KeyValue{"GeometryInfo", "AlgorithmSpecific-" + name + "-UTGeometryInfo"}, - KeyValue{"Magnet", Det::Magnet::det_path}}, - KeyValue{"OutputTracksName", "Rec/Track/UT"} ) {} + { KeyValue{ "InputTracksName", "Rec/Track/Velo" }, KeyValue{ "UTHits", UTInfo::HitLocation }, + KeyValue{ "GeometryInfo", "AlgorithmSpecific-" + name + "-UTGeometryInfo" }, + KeyValue{ "Magnet", Det::Magnet::det_path } }, + KeyValue{ "OutputTracksName", "Rec/Track/UT" } ) {} StatusCode initialize() override { return Transformer::initialize().andThen( [&] { - addConditionDerivation( {DeUTDetLocation::location(), m_PrUTMagnetTool->cacheLocation()}, + addConditionDerivation( { DeUTDetLocation::location(), m_PrUTMagnetTool->cacheLocation() }, inputLocation(), []( const DeUTDetector& det, const UTMagnetTool::Cache& magtoolcache ) { return VeloUTGeomCache( det, magtoolcache ); @@ -389,34 +389,34 @@ namespace LHCb::Pr::VeloUT { const DeMagnet& ) const override final; private: - Gaudi::Property m_minMomentum{this, "MinMomentum", 1500.f * Gaudi::Units::MeV}; - Gaudi::Property m_minPT{this, "MinPT", 300.f * Gaudi::Units::MeV}; - Gaudi::Property m_minMomentumFinal{this, "MinMomentumFinal", 2500.f * Gaudi::Units::MeV}; - Gaudi::Property m_minPTFinal{this, "MinPTFinal", 425.f * Gaudi::Units::MeV}; - Gaudi::Property m_maxPseudoChi2{this, "MaxPseudoChi2", 1280.}; - Gaudi::Property m_yTol{this, "YTolerance", 0.5 * Gaudi::Units::mm}; // 0.8 - Gaudi::Property m_yTolSlope{this, "YTolSlope", 0.08}; // 0.2 - Gaudi::Property m_hitTol{this, "HitTol", 0.8 * Gaudi::Units::mm}; // 0.8 - Gaudi::Property m_deltaTx{this, "DeltaTx", 0.018}; // 0.02 - Gaudi::Property m_maxXSlope{this, "MaxXSlope", 0.350}; - Gaudi::Property m_maxYSlope{this, "MaxYSlope", 0.300}; - Gaudi::Property m_centralHoleSize{this, "CentralHoleSize", 33. * Gaudi::Units::mm}; - Gaudi::Property m_intraLayerDist{this, "IntraLayerDist", 15.0 * Gaudi::Units::mm}; - Gaudi::Property m_overlapTol{this, "OverlapTol", 0.5 * Gaudi::Units::mm}; - Gaudi::Property m_passHoleSize{this, "PassHoleSize", 40. * Gaudi::Units::mm}; - Gaudi::Property m_LD3Hits{this, "LD3HitsMin", -0.5}; - Gaudi::Property m_LD4Hits{this, "LD4HitsMin", -0.5}; - Gaudi::Property m_minLayers{this, "MinLayers", 3}; - - Gaudi::Property m_printVariables{this, "PrintVariables", false}; - Gaudi::Property m_passTracks{this, "PassTracks", false}; - Gaudi::Property m_finalFit{this, "FinalFit", true}; - Gaudi::Property m_fiducialCuts{this, "FiducialCuts", true}; - Gaudi::Property m_filterMode{this, "FilterMode", false}; - Gaudi::Property m_looseSectorSearch{this, "LooseSectorSearch", false}; - - mutable Gaudi::Accumulators::SummingCounter m_seedsCounter{this, "#seeds"}; - mutable Gaudi::Accumulators::SummingCounter m_tracksCounter{this, "#tracks"}; + Gaudi::Property m_minMomentum{ this, "MinMomentum", 1500.f * Gaudi::Units::MeV }; + Gaudi::Property m_minPT{ this, "MinPT", 300.f * Gaudi::Units::MeV }; + Gaudi::Property m_minMomentumFinal{ this, "MinMomentumFinal", 2500.f * Gaudi::Units::MeV }; + Gaudi::Property m_minPTFinal{ this, "MinPTFinal", 425.f * Gaudi::Units::MeV }; + Gaudi::Property m_maxPseudoChi2{ this, "MaxPseudoChi2", 1280. }; + Gaudi::Property m_yTol{ this, "YTolerance", 0.5 * Gaudi::Units::mm }; // 0.8 + Gaudi::Property m_yTolSlope{ this, "YTolSlope", 0.08 }; // 0.2 + Gaudi::Property m_hitTol{ this, "HitTol", 0.8 * Gaudi::Units::mm }; // 0.8 + Gaudi::Property m_deltaTx{ this, "DeltaTx", 0.018 }; // 0.02 + Gaudi::Property m_maxXSlope{ this, "MaxXSlope", 0.350 }; + Gaudi::Property m_maxYSlope{ this, "MaxYSlope", 0.300 }; + Gaudi::Property m_centralHoleSize{ this, "CentralHoleSize", 33. * Gaudi::Units::mm }; + Gaudi::Property m_intraLayerDist{ this, "IntraLayerDist", 15.0 * Gaudi::Units::mm }; + Gaudi::Property m_overlapTol{ this, "OverlapTol", 0.5 * Gaudi::Units::mm }; + Gaudi::Property m_passHoleSize{ this, "PassHoleSize", 40. * Gaudi::Units::mm }; + Gaudi::Property m_LD3Hits{ this, "LD3HitsMin", -0.5 }; + Gaudi::Property m_LD4Hits{ this, "LD4HitsMin", -0.5 }; + Gaudi::Property m_minLayers{ this, "MinLayers", 3 }; + + Gaudi::Property m_printVariables{ this, "PrintVariables", false }; + Gaudi::Property m_passTracks{ this, "PassTracks", false }; + Gaudi::Property m_finalFit{ this, "FinalFit", true }; + Gaudi::Property m_fiducialCuts{ this, "FiducialCuts", true }; + Gaudi::Property m_filterMode{ this, "FilterMode", false }; + Gaudi::Property m_looseSectorSearch{ this, "LooseSectorSearch", false }; + + mutable Gaudi::Accumulators::SummingCounter m_seedsCounter{ this, "#seeds" }; + mutable Gaudi::Accumulators::SummingCounter m_tracksCounter{ this, "#tracks" }; LHCb::UT::TrackUtils::MiniStates getStates( const Velo::Tracks& inputTracks, Upstream::Tracks& outputTracks, float zMidUT ) const; @@ -438,11 +438,11 @@ namespace LHCb::Pr::VeloUT { const BdlTable& bdlTable, const DeMagnet& magnet, float zMidUT ) const; /// Multipurpose tool for Bdl and deflection - ToolHandle m_PrUTMagnetTool{this, "PrUTMagnetTool", "PrUTMagnetTool"}; + ToolHandle m_PrUTMagnetTool{ this, "PrUTMagnetTool", "PrUTMagnetTool" }; - constexpr static float c_zKink{1780.0}; - constexpr static float c_sigmaVeloSlope{0.10 * Gaudi::Units::mrad}; - constexpr static float c_invSigmaVeloSlope{10.0 / Gaudi::Units::mrad}; + constexpr static float c_zKink{ 1780.0 }; + constexpr static float c_sigmaVeloSlope{ 0.10 * Gaudi::Units::mrad }; + constexpr static float c_invSigmaVeloSlope{ 10.0 / Gaudi::Units::mrad }; }; // Declaration of the Algorithm Factory @@ -474,8 +474,8 @@ namespace LHCb::Pr::VeloUT { abs( velogeom.distToMomentum * invMinMom * normFact[layerIndex] ) - abs( tx ) * m_intraLayerDist.value(); const auto yTol = m_yTol.value() + m_yTolSlope.value() * xTol; - const auto zGeo{geometry.layers[layerIndex].z}; - const auto dxDy{geometry.layers[layerIndex].dxDy}; + const auto zGeo{ geometry.layers[layerIndex].z }; + const auto dxDy{ geometry.layers[layerIndex].dxDy }; const auto yAtZ = y + ty * ( zGeo - z ); const auto xLayer = x + tx * ( zGeo - z ); @@ -506,7 +506,7 @@ namespace LHCb::Pr::VeloUT { hitsInLayers.emplace_back( Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) ).reserve( 32 ); } - auto pTracks = ProtoTracks{Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx )}; + auto pTracks = ProtoTracks{ Zipping::generateZipIdentifier(), LHCb::getMemResource( evtCtx ) }; pTracks.reserve( inputTracks.size() ); for ( auto fState : filteredStates.scalar() ) { @@ -519,10 +519,10 @@ namespace LHCb::Pr::VeloUT { hitsInLayers[hitContainerIdx].clear(); hitsInLayers[hitContainerIdx].layerIndices.fill( -1 ); if ( !m_looseSectorSearch ? getHitsScalar( - {x, y, z, tx, ty}, fState.offset(), velogeom.zMidUT, m_minLayers, hh, + { x, y, z, tx, ty }, fState.offset(), velogeom.zMidUT, m_minLayers, hh, std::get( compBoundsArray ), hitsInLayers[hitContainerIdx] ) : getHitsScalar( - {x, y, z, tx, ty}, fState.offset(), velogeom.zMidUT, m_minLayers, hh, + { x, y, z, tx, ty }, fState.offset(), velogeom.zMidUT, m_minLayers, hh, std::get( compBoundsArray ), hitsInLayers[hitContainerIdx] ) ) { const auto ancestorIndex = fState.get(); const auto zDiff = c_zKink - z; @@ -536,7 +536,7 @@ namespace LHCb::Pr::VeloUT { pTrack.field().set( ancestorIndex ); pTrack.field().set( m_maxPseudoChi2 ); pTrack.field().set( hitContainerIdx ); - for ( auto i{0}; i < totalUTLayers; ++i ) { pTrack.field( i ).set( -1 ); } + for ( auto i{ 0 }; i < totalUTLayers; ++i ) { pTrack.field( i ).set( -1 ); } } } @@ -551,7 +551,7 @@ namespace LHCb::Pr::VeloUT { // -- Note: These are not "physical" layers, as the hits are ordered such that only // -- the last one can be not filled. auto hitsInL = hitsInLayers[hitsOffset].scalar(); - auto nValid{0}; + auto nValid{ 0 }; if ( hitsInL.size() != 0 ) { for ( auto i = 0; i < totalUTLayers; ++i ) { auto hitI = pTrack.field( i ).get().cast(); @@ -559,7 +559,7 @@ namespace LHCb::Pr::VeloUT { const auto weight = valid ? hitsInL[hitI].weight().cast() : 0.0f; nValid += valid; hitI = std::max( 0, hitI ); - const auto id = LHCbID{LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() )}; + const auto id = LHCbID{ LHCb::Detector::UT::ChannelID( hitsInL[hitI].channelID().cast() ) }; pTrack.field( i ).set( weight ); pTrack.field( i ).set( hitsInL[hitI].x() ); pTrack.field( i ).set( hitsInL[hitI].z() ); @@ -664,10 +664,10 @@ namespace LHCb::Pr::VeloUT { int nLayers = 0; // -- the protos could be precomputed - const simd::float_v yProto{yState - tyState * zState}; - const simd::float_v xOnTrackProto{xState - txState * zState}; - const simd::float_v ty{tyState}; - const simd::float_v tx{txState}; + const simd::float_v yProto{ yState - tyState * zState }; + const simd::float_v xOnTrackProto{ xState - txState * zState }; + const simd::float_v ty{ tyState }; + const simd::float_v tx{ txState }; // -- the second condition is to ensure at least 3 layers with hits for ( int layerIndex = 0; layerIndex < totalUTLayers && layerIndex - nLayers <= totalUTLayers - minLayers; ++layerIndex ) { @@ -839,7 +839,7 @@ namespace LHCb::Pr::VeloUT { } // -- All hits found if ( bestHit1Index != -1 && bestHit3Index != -1 ) { - simpleFit( std::array{i0, bestHit1Index, i2, bestHit3Index}, hitsInLayers, pTrack, zMidUT, c_zKink, + simpleFit( std::array{ i0, bestHit1Index, i2, bestHit3Index }, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); if ( !fourLayerSolution && !pTrack.isInvalid() ) { fourLayerSolution = true; } @@ -848,12 +848,12 @@ namespace LHCb::Pr::VeloUT { // -- Nothing found in layer 3 if ( !fourLayerSolution && bestHit1Index != -1 ) { - simpleFit( std::array{i0, bestHit1Index, i2}, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); + simpleFit( std::array{ i0, bestHit1Index, i2 }, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); continue; } // -- Noting found in layer 1 if ( !fourLayerSolution && bestHit3Index != -1 ) { - simpleFit( std::array{i0, bestHit3Index, i2}, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); + simpleFit( std::array{ i0, bestHit3Index, i2 }, hitsInLayers, pTrack, zMidUT, c_zKink, c_invSigmaVeloSlope ); continue; } } @@ -882,23 +882,23 @@ namespace LHCb::Pr::VeloUT { // -- FIXME: these rely on the internal details of PrTableForFunction!!! // and should at least be put back in there, and used from here // to make sure everything _stays_ consistent... - auto var = std::array{ty, zOrigin, z}; - auto index1 = min( max( simd::int_v{( var[0] + 0.3f ) / 0.6f * 30}, 0 ), 30 ); - auto index2 = min( max( simd::int_v{( var[1] + 250 ) / 500 * 10}, 0 ), 10 ); - auto index3 = min( max( simd::int_v{var[2] / 800 * 10}, 0 ), 10 ); + auto var = std::array{ ty, zOrigin, z }; + auto index1 = min( max( simd::int_v{ ( var[0] + 0.3f ) / 0.6f * 30 }, 0 ), 30 ); + auto index2 = min( max( simd::int_v{ ( var[1] + 250 ) / 500 * 10 }, 0 ), 10 ); + auto index3 = min( max( simd::int_v{ var[2] / 800 * 10 }, 0 ), 10 ); auto bdl = gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 ) ); // -- TODO: check if we can go outside this table... - const auto bdls = std::array{gather( bdlTable.table().data(), masterIndexSIMD( index1 + 1, index2, index3 ) ), - gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), - gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) )}; + const auto bdls = std::array{ gather( bdlTable.table().data(), masterIndexSIMD( index1 + 1, index2, index3 ) ), + gather( bdlTable.table().data(), masterIndexSIMD( index1, index2 + 1, index3 ) ), + gather( bdlTable.table().data(), masterIndexSIMD( index1, index2, index3 + 1 ) ) }; - constexpr auto minValsBdl = std::array{-0.3f, -250.0f, 0.0f}; - constexpr auto maxValsBdl = std::array{0.3f, 250.0f, 800.0f}; - constexpr auto deltaBdl = std::array{0.02f, 50.0f, 80.0f}; + constexpr auto minValsBdl = std::array{ -0.3f, -250.0f, 0.0f }; + constexpr auto maxValsBdl = std::array{ 0.3f, 250.0f, 800.0f }; + constexpr auto deltaBdl = std::array{ 0.02f, 50.0f, 80.0f }; const auto boundaries = - std::array{-0.3f + simd::float_v{index1} * deltaBdl[0], -250.0f + simd::float_v{index2} * deltaBdl[1], - 0.0f + simd::float_v{index3} * deltaBdl[2]}; + std::array{ -0.3f + simd::float_v{ index1 } * deltaBdl[0], -250.0f + simd::float_v{ index2 } * deltaBdl[1], + 0.0f + simd::float_v{ index3 } * deltaBdl[2] }; // -- This is an interpolation, to get a bit more precision simd::float_v addBdlVal{ 0.0f }; @@ -914,18 +914,18 @@ namespace LHCb::Pr::VeloUT { // ---- // -- order is: x, tx, y, chi2 - auto finalParams = std::array{pTrack.template field().get(), - pTrack.template field().get(), y + ty * ( zMidUT - z ), - pTrack.template field().get()}; + auto finalParams = std::array{ pTrack.template field().get(), + pTrack.template field().get(), y + ty * ( zMidUT - z ), + pTrack.template field().get() }; const auto qpxz2p = -1.0f / bdl * 3.3356f / Gaudi::Units::GeV; auto fitMask = simd::mask_true(); auto qp = m_finalFit - ? fastfitterSIMD( finalParams, pTrack, zMidUT, qpxz2p, fitMask ) - : pTrack.template field().get() / sqrt( 1.0f + ty * ty ); // is this correct? + ? fastfitterSIMD( finalParams, pTrack, zMidUT, qpxz2p, fitMask ) + : pTrack.template field().get() / sqrt( 1.0f + ty * ty ); // is this correct? qp = select( fitMask, qp, pTrack.template field().get() ); - const auto qop = select( abs( bdl ) < 1.e-8f, simd::float_v{1000.0f}, qp * qpxz2p ); + const auto qop = select( abs( bdl ) < 1.e-8f, simd::float_v{ 1000.0f }, qp * qpxz2p ); // -- Don't make tracks that have grossly too low momentum // -- Beware of the momentum resolution! @@ -952,8 +952,8 @@ namespace LHCb::Pr::VeloUT { // -- the values only make sense if the final fit is performed auto mvaMask = simd::mask_true(); if ( const auto fourHitTrack = pTrack.template field( 3 ).get() > 0.0001f; m_finalFit ) { - const auto fourHitDisc = evaluateLinearDiscriminantSIMD<4>( {p, pt, finalParams[3]} ); - const auto threeHitDisc = evaluateLinearDiscriminantSIMD<3>( {p, pt, finalParams[3]} ); + const auto fourHitDisc = evaluateLinearDiscriminantSIMD<4>( { p, pt, finalParams[3] } ); + const auto threeHitDisc = evaluateLinearDiscriminantSIMD<3>( { p, pt, finalParams[3] } ); const auto fourHitMask = fourHitDisc > m_LD4Hits.value(); const auto threeHitMask = threeHitDisc > m_LD3Hits.value(); // -- only have 3 or 4 hit tracks @@ -1058,7 +1058,7 @@ namespace LHCb::Pr::VeloUT { if ( nUTHits[t2] >= static_cast( Upstream::Tracks::MaxUTHits ) ) continue; const auto utidx = hitsInL[index2].index(); - const auto oid = LHCbID{LHCb::Detector::UT::ChannelID( hitsInL[index2].channelID().cast() )}; + const auto oid = LHCbID{ LHCb::Detector::UT::ChannelID( hitsInL[index2].channelID().cast() ) }; const auto lhcbid = bit_cast( oid.lhcbID() ); auto hits = outputTracks.scalar()[currentsize + trackIndex2].field(); -- GitLab