Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ void CalibdEdxContainer::setDefaultZeroSupresssionThreshold()
mThresholdMap.setMinCorrectionFactor(defaultVal - 0.1f);
mThresholdMap.setMaxCorrectionFactor(defaultVal + 0.1f);
for (int32_t sector = 0; sector < o2::tpc::constants::MAXSECTOR; ++sector) {
for (uint16_t globPad = 0; globPad < TPC_PADS_IN_SECTOR; ++globPad) {
for (uint16_t globPad = 0; globPad < TPC_REAL_PADS_IN_SECTOR; ++globPad) {
mThresholdMap.setGainCorrection(sector, globPad, defaultVal);
}
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ TPCPadBitMap::TPCPadBitMap(const o2::tpc::CalDet<bool>& map) : TPCPadBitMap()
void TPCPadBitMap::setFromMap(const o2::tpc::CalDet<bool>& map)
{
for (int32_t sector = 0; sector < o2::tpc::constants::MAXSECTOR; sector++) {
for (int32_t p = 0; p < TPC_PADS_IN_SECTOR; p++) {
for (int32_t p = 0; p < TPC_REAL_PADS_IN_SECTOR; p++) {
const auto val = map.getValue(sector, p);
mBitMap[sector].set(p, val);
}
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataTypes/TPCPadBitMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ struct TPCPadBitMap {
{
public:
using T = uint32_t;
static constexpr int32_t NWORDS = (TPC_PADS_IN_SECTOR + sizeof(T) * 8 - 1) / sizeof(T);
static constexpr int32_t NWORDS = (TPC_REAL_PADS_IN_SECTOR + sizeof(T) * 8 - 1) / sizeof(T);
GPUdi() SectorBitMap()
{
reset();
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TPCPadGainCalib::TPCPadGainCalib(const o2::tpc::CalDet<float>& gainMap, const fl
void TPCPadGainCalib::setFromMap(const o2::tpc::CalDet<float>& gainMap, const bool inv)
{
for (int32_t sector = 0; sector < o2::tpc::constants::MAXSECTOR; sector++) {
for (int32_t p = 0; p < TPC_PADS_IN_SECTOR; p++) {
for (int32_t p = 0; p < TPC_REAL_PADS_IN_SECTOR; p++) {
const float gainVal = gainMap.getValue(sector, p);
inv ? mGainCorrection[sector].set(p, (gainVal > 1.e-5f) ? 1.f / gainVal : 1.f) : mGainCorrection[sector].set(p, gainVal);
}
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/DataTypes/TPCPadGainCalib.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,14 @@ struct TPCPadGainCalib {

GPUd() void reset()
{
for (uint16_t p = 0; p < TPC_PADS_IN_SECTOR; p++) {
for (uint16_t p = 0; p < TPC_REAL_PADS_IN_SECTOR; p++) {
set(p, 1.0f);
}
}

private:
T mGainCorrection[TPC_REAL_PADS_IN_SECTOR];

GPUd() T pack(float f) const
{
f = CAMath::Clamp(f, mMinCorrectionFactor, mMaxCorrectionFactor);
Expand All @@ -140,8 +142,6 @@ struct TPCPadGainCalib {
return mMinCorrectionFactor + (mMaxCorrectionFactor - mMinCorrectionFactor) * float(c) / float(NumOfSteps);
}

T mGainCorrection[TPC_PADS_IN_SECTOR];

GPUdi() T& at(uint16_t globalPad)
{
return mGainCorrection[globalPad];
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/DataTypes/TPCZSLinkMapping.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ TPCZSLinkMapping::TPCZSLinkMapping(o2::tpc::Mapper& mapper)
assert(fecToGlobalPad.size() == TPC_FEC_IDS_IN_SECTOR);

const auto& globalPadToPadPos = mapper.getMapGlobalPadToPadPos();
assert(globalPadToPadPos.size() == TPC_PADS_IN_SECTOR);
assert(globalPadToPadPos.size() == TPC_REAL_PADS_IN_SECTOR);

for (size_t i = 0; i < TPC_FEC_IDS_IN_SECTOR; i++) {
FECIDToPadPos[i] = globalPadToPadPos[fecToGlobalPad[i]];
Expand Down
16 changes: 10 additions & 6 deletions GPU/GPUTracking/Definitions/clusterFinderDefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,19 @@
#endif

// Padding of 2 and 3 respectively would be enough. But this ensures that
// rows are always aligned along cache lines. Likewise for TPC_PADS_PER_ROW.
// rows are always aligned along cache lines. Likewise for TPC_CLUSTERER_ROW_PAD_CAPACITY.
#define GPUCF_PADDING_PAD 8
#define GPUCF_PADDING_TIME 4
#define TPC_PADS_PER_ROW 144
// Largest possible number of pads in a TPC row
#define TPC_CLUSTERER_ROW_PAD_CAPACITY 144

#define TPC_ROWS_PER_CRU 18
#define TPC_PADS_PER_ROW_PADDED (TPC_PADS_PER_ROW + GPUCF_PADDING_PAD)
#define TPC_NUM_OF_PADS (GPUCA_ROW_COUNT * TPC_PADS_PER_ROW_PADDED + GPUCF_PADDING_PAD)
#define TPC_PADS_IN_SECTOR 14560
// Stride between rows as stored internally by the clusterizer
#define TPC_CLUSTERER_ROW_STRIDE (TPC_CLUSTERER_ROW_PAD_CAPACITY + GPUCF_PADDING_PAD)
// Number of pads in a sector as stored internally by the clusterizer.
// This includes fake pads for constant strides between rows
#define TPC_CLUSTERER_STRIDED_PAD_COUNT (GPUCA_ROW_COUNT * TPC_CLUSTERER_ROW_STRIDE + GPUCF_PADDING_PAD)
// Real of number of pads in a sector
#define TPC_REAL_PADS_IN_SECTOR 14560
#define TPC_FEC_IDS_IN_SECTOR 23296
#define TPC_MAX_FRAGMENT_LEN_GPU 4000
#define TPC_MAX_FRAGMENT_LEN_HOST 1000
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout<ChargeMapType>::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType));
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout<PeakMapType>::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType));
if (fragment.index == 0) {
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy));
runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_CLUSTERER_STRIDED_PAD_COUNT * sizeof(*clustererShadow.mPpadIsNoisy));
}
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererZeroedCharges, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges");

Expand Down Expand Up @@ -965,7 +965,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
const int32_t nBlocks = GPUTPCCFCheckPadBaseline::GetNBlocks(doGPU);

runKernel<GPUTPCCFCheckPadBaseline>({GetGridBlk(nBlocks, lane), {iSector}});
getKernelTimer<GPUTPCCFCheckPadBaseline>(RecoStep::TPCClusterFinding, iSector, TPC_PADS_IN_SECTOR * fragment.lengthWithoutOverlap() * sizeof(PackedCharge), false);
getKernelTimer<GPUTPCCFCheckPadBaseline>(RecoStep::TPCClusterFinding, iSector, TPC_REAL_PADS_IN_SECTOR * fragment.lengthWithoutOverlap() * sizeof(PackedCharge), false);
}

runKernel<GPUTPCCFPeakFinder>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
Expand Down
8 changes: 4 additions & 4 deletions GPU/GPUTracking/TPCClusterFinder/CfArray2D.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TilingLayout
enum {
Height = Grid::Height,
Width = Grid::Width,
WidthInTiles = (TPC_NUM_OF_PADS + Width - 1) / Width,
WidthInTiles = (TPC_CLUSTERER_STRIDED_PAD_COUNT + Width - 1) / Width,
};

GPUdi() static tpccf::SizeT idx(const CfChargePos& p)
Expand All @@ -65,7 +65,7 @@ class TilingLayout

GPUd() static size_t items(size_t fragmentLen)
{
return (TPC_NUM_OF_PADS + Width - 1) / Width * Width * (TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen) + Height - 1) / Height * Height;
return (TPC_CLUSTERER_STRIDED_PAD_COUNT + Width - 1) / Width * Width * (TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen) + Height - 1) / Height * Height;
}
};

Expand All @@ -74,12 +74,12 @@ class LinearLayout
public:
GPUdi() static tpccf::SizeT idx(const CfChargePos& p)
{
return TPC_NUM_OF_PADS * p.timePadded + p.gpad;
return TPC_CLUSTERER_STRIDED_PAD_COUNT * p.timePadded + p.gpad;
}

GPUd() static size_t items(size_t fragmentLen)
{
return TPC_NUM_OF_PADS * TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen);
return TPC_CLUSTERER_STRIDED_PAD_COUNT * TPC_MAX_FRAGMENT_LEN_PADDED(fragmentLen);
}
};

Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/TPCClusterFinder/CfChargePos.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ struct CfChargePos {

GPUdi() bool valid() const { return timePadded >= 0; }

GPUdi() tpccf::Row row() const { return gpad / TPC_PADS_PER_ROW_PADDED; }
GPUdi() tpccf::Pad pad() const { return gpad % TPC_PADS_PER_ROW_PADDED - GPUCF_PADDING_PAD; }
GPUdi() tpccf::Row row() const { return gpad / TPC_CLUSTERER_ROW_STRIDE; }
GPUdi() tpccf::Pad pad() const { return gpad % TPC_CLUSTERER_ROW_STRIDE - GPUCF_PADDING_PAD; }
GPUdi() tpccf::TPCFragmentTime time() const { return timePadded - GPUCF_PADDING_TIME; }
GPUdi() tpccf::TPCFragmentTime globalTime() const { return timePadded; }

Expand All @@ -52,7 +52,7 @@ struct CfChargePos {
// index between 0 and TPC_NUM_OF_PADS.
static constexpr GPUdi() tpccf::GlobalPad tpcGlobalPadIdx(tpccf::Row row, tpccf::Pad pad)
{
return TPC_PADS_PER_ROW_PADDED * row + pad + GPUCF_PADDING_PAD;
return TPC_CLUSTERER_ROW_STRIDE * row + pad + GPUCF_PADDING_PAD;
}
};

Expand Down
53 changes: 10 additions & 43 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
const CfFragment& fragment = clusterer.mPmemory->fragment;
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));

constexpr GPUTPCGeometry geo;

const auto iRow = iBlock;
const auto rowinfo = GetRowInfo(iRow);
const auto nPads = geo.NPads(iRow);
const CfChargePos basePos{(Row)iRow, 0, 0};

int32_t totalCharges = 0;
Expand All @@ -62,7 +64,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
const int16_t iPadOffset = iThread % MaxNPadsPerRow;
const int16_t iTimeOffset = iThread / MaxNPadsPerRow;
const int16_t iPadHandle = iThread;
const bool handlePad = iPadHandle < rowinfo.nPads;
const bool handlePad = iPadHandle < nPads;

const auto firstTB = fragment.firstNonOverlapTimeBin();
const auto lastTB = fragment.lastNonOverlapTimeBin();
Expand All @@ -73,7 +75,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t

const CfChargePos pos = basePos.delta({iPadOffset, iTime});

smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < rowinfo.nPads ? chargeMap[pos].unpack() : 0;
smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < nPads ? chargeMap[pos].unpack() : 0;

GPUbarrier();

Expand All @@ -91,7 +93,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t
}

if (handlePad) {
updatePadBaseline(rowinfo.globalPadOffset + iPadOffset, clusterer, totalCharges, maxConsecCharges, maxCharge);
updatePadBaseline(basePos.gpad + iPadHandle, clusterer, totalCharges, maxConsecCharges, maxCharge);
}
#endif
}
Expand All @@ -102,11 +104,10 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineCPU(int32_t nBlocks, int32_t
const CfFragment& fragment = clusterer.mPmemory->fragment;
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));

int32_t basePad = iBlock * PadsPerCacheline;
int32_t padsPerRow;
CfChargePos basePos = padToCfChargePos<PadsPerCacheline>(basePad, clusterer, padsPerRow);
CfChargePos basePos(iBlock * PadsPerCacheline, 0);

if (not basePos.valid()) {
constexpr GPUTPCGeometry geo;
if (basePos.pad() >= geo.NPads(basePos.row())) {
return;
}

Expand Down Expand Up @@ -153,45 +154,11 @@ GPUd() void GPUTPCCFCheckPadBaseline::CheckBaselineCPU(int32_t nBlocks, int32_t
}

for (tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
updatePadBaseline(basePad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
updatePadBaseline(basePos.gpad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
}
#endif
}

template <int32_t PadsPerBlock>
GPUd() CfChargePos GPUTPCCFCheckPadBaseline::padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer, int32_t& padsPerRow)
{
constexpr GPUTPCGeometry geo;

int32_t padOffset = 0;
for (Row r = 0; r < GPUCA_ROW_COUNT; r++) {
int32_t npads = geo.NPads(r);
int32_t padInRow = pad - padOffset;
if (0 <= padInRow && padInRow < npads) {
int32_t cachelineOffset = padInRow % PadsPerBlock;
pad -= cachelineOffset;
padsPerRow = npads;
return CfChargePos{r, Pad(padInRow - cachelineOffset), 0};
}
padOffset += npads;
}

padsPerRow = 0;
return CfChargePos{0, 0, INVALID_TIME_BIN};
}

GPUd() GPUTPCCFCheckPadBaseline::RowInfo GPUTPCCFCheckPadBaseline::GetRowInfo(int16_t row)
{
constexpr GPUTPCGeometry geo;

int16_t padOffset = 0;
for (int16_t r = 0; r < row; r++) {
padOffset += geo.NPads(r);
}

return RowInfo{padOffset, geo.NPads(row)};
}

GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTPCClusterFinder& clusterer, int32_t totalCharges, int32_t consecCharges, Charge maxCharge)
{
const CfFragment& fragment = clusterer.mPmemory->fragment;
Expand Down
11 changes: 1 addition & 10 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate

static int32_t GetNBlocks(bool isGPU)
{
const int32_t nBlocks = TPC_PADS_IN_SECTOR / PadsPerCacheline;
const int32_t nBlocks = TPC_CLUSTERER_STRIDED_PAD_COUNT / PadsPerCacheline;
return isGPU ? GPUCA_ROW_COUNT : nBlocks;
}

Expand All @@ -74,15 +74,6 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
GPUd() static void CheckBaselineGPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);
GPUd() static void CheckBaselineCPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer);

template <int32_t PadsPerBlock>
GPUd() static CfChargePos padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder&, int32_t& padsPerRow);

struct RowInfo {
int16_t globalPadOffset;
int16_t nPads;
};
GPUd() static RowInfo GetRowInfo(int16_t row);

GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge);
};

Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ GPUd() void GPUTPCCFPeakFinder::findPeaksImpl(int32_t nBlocks, int32_t nThreads,
// For certain configurations dummy work items are added, so the total
// number of work items is dividable by 64.
// These dummy items also compute the last digit but discard the result.
CfChargePos pos = positions[CAMath::Min(idx, (SizeT)(digitnum - 1))];
CfChargePos pos = positions[CAMath::Min<SizeT>(idx, digitnum - 1)];
Charge charge = pos.valid() ? chargeMap[pos].unpack() : Charge(0);

bool hasLostBaseline = padHasLostBaseline[gainCorrection.globalPad(pos.row(), pos.pad())];
charge = (hasLostBaseline) ? 0.f : charge;
bool hasLostBaseline = padHasLostBaseline[pos.gpad];
charge = hasLostBaseline ? 0.f : charge;

uint8_t peak = isPeak(smem, charge, pos, SCRATCH_PAD_SEARCH_N, chargeMap, calib, smem.posBcast, smem.buf);

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void* GPUTPCClusterFinder::SetPointersOutput(void* mem)

void* GPUTPCClusterFinder::SetPointersScratch(void* mem)
{
computePointerWithAlignment(mem, mPpadIsNoisy, TPC_PADS_IN_SECTOR);
computePointerWithAlignment(mem, mPpadIsNoisy, TPC_CLUSTERER_STRIDED_PAD_COUNT);
computePointerWithAlignment(mem, mPpositions, mNMaxDigitsFragment);
computePointerWithAlignment(mem, mPpeakPositions, mNMaxPeaks);
computePointerWithAlignment(mem, mPfilteredPeakPositions, mNMaxClusters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view titl

for (TPCFragmentTime i = start; i < end; i++) {
int32_t zeros = 0;
for (GlobalPad j = 0; j < TPC_NUM_OF_PADS; j++) {
for (GlobalPad j = 0; j < TPC_CLUSTERER_STRIDED_PAD_COUNT; j++) {
uint16_t q = map[{j, i}];
zeros += (q == 0);
if (q != 0) {
Expand Down Expand Up @@ -84,7 +84,7 @@ void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title)
int32_t zeros = 0;

out << i << ":";
for (GlobalPad j = 0; j < TPC_NUM_OF_PADS; j++) {
for (GlobalPad j = 0; j < TPC_CLUSTERER_STRIDED_PAD_COUNT; j++) {
uint8_t q = map[{j, i}];
zeros += (q == 0);
if (q != 0) {
Expand Down