From d0e4e1a82fbd75ef6279ebe76f952d3059437821 Mon Sep 17 00:00:00 2001 From: Huilin Qu Date: Fri, 6 Jul 2018 16:17:25 +0200 Subject: [PATCH] Some improvements and clean-ups. --- .../python/slimming/applyDeepBtagging_cff.py | 6 +- .../PatAlgos/python/tools/jetTools.py | 20 +++-- RecoBTag/DeepBoostedJet/plugins/BuildFile.xml | 2 - .../plugins/DeepBoostedJetTagInfoProducer.cc | 84 +++++-------------- .../plugins/DeepBoostedJetTagsProducer.cc | 35 ++++---- 5 files changed, 56 insertions(+), 91 deletions(-) diff --git a/PhysicsTools/PatAlgos/python/slimming/applyDeepBtagging_cff.py b/PhysicsTools/PatAlgos/python/slimming/applyDeepBtagging_cff.py index b21352852ba5f..d9a7b76cf8b71 100644 --- a/PhysicsTools/PatAlgos/python/slimming/applyDeepBtagging_cff.py +++ b/PhysicsTools/PatAlgos/python/slimming/applyDeepBtagging_cff.py @@ -44,8 +44,8 @@ def applyDeepBtagging( process, postfix="" ) : - # update slimmed jets to include DeepFlavour (keep same name) - # make clone for DeepDoubleB-less slimmed AK8 jets, so output name is preserved + # update slimmed jets to include particle-based deep taggers (keep same name) + # make clone for DeepTags-less slimmed AK8 jets, so output name is preserved addToProcessAndTask('slimmedJetsAK8NoDeepTags', process.slimmedJetsAK8.clone(), process, task) updateJetCollection( process, @@ -114,7 +114,7 @@ def applyDeepBtagging( process, postfix="" ) : printWarning = False ) - # slimmedJetsAK8 with DeepDoubleB (remove DeepDoubleB-less) + # slimmedJetsAK8 with DeepTags (remove DeepTags-less) delattr(process, 'slimmedJetsAK8') addToProcessAndTask('slimmedJetsAK8', getattr(process,'selectedUpdatedPatJetsSlimmedAK8DeepTags'+postfix).clone(), process, task) # delete module not used anymore (slimmedJetsAK8 substitutes) diff --git a/PhysicsTools/PatAlgos/python/tools/jetTools.py b/PhysicsTools/PatAlgos/python/tools/jetTools.py index 0e6a4ae879f4c..ddb3e48f68bd8 100644 --- a/PhysicsTools/PatAlgos/python/tools/jetTools.py +++ b/PhysicsTools/PatAlgos/python/tools/jetTools.py @@ -633,21 +633,29 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou if btagInfo == 'pfDeepBoostedJetTagInfos': jetSrcName = jetSource.value().lower() if 'slimmed' in jetSrcName or 'updated' in jetSrcName: - # running on slimmedJetsAK8 or re-applying btag on MiniAOD - update_jets = cms.bool(True) + # case 1: update jets whose daughters are PackedCandidates, e.g., slimmedJetsAK8 + # daughters are links to original PackedCandidates, so NOT scaled by their puppi weights + has_puppi_weighted_daughters = cms.bool(False) puppi_value_map = cms.InputTag("") vertex_associator = cms.InputTag("") else: sys.stderr.write("Warning: running pfDeepBoostedJetTagInfos on %s is not supported yet.\n"%jetSource) - update_jets = cms.bool(False) - puppi_value_map = cms.InputTag("puppi") - vertex_associator = cms.InputTag("primaryVertexAssociation","original") + has_puppi_weighted_daughters = cms.bool(True) + # daughters are the particles used in jet clustering, so already scaled by their puppi weights + if pfCandidates.value() == 'packedPFCandidates': + # case 2: running on new jet collection whose daughters are PackedCandidates (e.g., recluster jets from MiniAOD files) + puppi_value_map = cms.InputTag("") + vertex_associator = cms.InputTag("") + elif pfCandidates.value() == 'particleFlow': + # case 3: running on new jet collection whose daughters are PFCandidates (e.g., cluster jets in RECO/AOD) + puppi_value_map = cms.InputTag("puppi") + vertex_associator = cms.InputTag("primaryVertexAssociation","original") addToProcessAndTask(btagPrefix+btagInfo+labelName+postfix, btag.pfDeepBoostedJetTagInfos.clone( jets = jetSource, vertices = pvSource, secondary_vertices = svSource, - update_jets = update_jets, + has_puppi_weighted_daughters = has_puppi_weighted_daughters, puppi_value_map = puppi_value_map, vertex_associator = vertex_associator, ), diff --git a/RecoBTag/DeepBoostedJet/plugins/BuildFile.xml b/RecoBTag/DeepBoostedJet/plugins/BuildFile.xml index 1a293383576fd..fa7a7cf8bcc61 100644 --- a/RecoBTag/DeepBoostedJet/plugins/BuildFile.xml +++ b/RecoBTag/DeepBoostedJet/plugins/BuildFile.xml @@ -3,10 +3,8 @@ - - diff --git a/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagInfoProducer.cc b/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagInfoProducer.cc index a84abf05b5334..59da6b14529d2 100644 --- a/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagInfoProducer.cc +++ b/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagInfoProducer.cc @@ -44,7 +44,7 @@ class DeepBoostedJetTagInfoProducer : public edm::stream::EDProducer<> void fillParticleFeatures(DeepBoostedJetFeatures &fts, const reco::Jet &jet); void fillSVFeatures(DeepBoostedJetFeatures &fts, const reco::Jet &jet); - const bool update_jets_; + const bool has_puppi_weighted_daughters_; const double jet_radius_; const double min_jet_pt_; const double min_pt_for_track_properties_; @@ -72,10 +72,10 @@ class DeepBoostedJetTagInfoProducer : public edm::stream::EDProducer<> }; DeepBoostedJetTagInfoProducer::DeepBoostedJetTagInfoProducer(const edm::ParameterSet& iConfig) -: update_jets_(iConfig.getParameter("update_jets")) +: has_puppi_weighted_daughters_(iConfig.getParameter("has_puppi_weighted_daughters")) , jet_radius_(iConfig.getParameter("jet_radius")) , min_jet_pt_(iConfig.getParameter("min_jet_pt")) -, min_pt_for_track_properties_(iConfig.getParameter("minPtForTrackProperties")) +, min_pt_for_track_properties_(iConfig.getParameter("min_pt_for_track_properties")) , jet_token_(consumes>(iConfig.getParameter("jets"))) , vtx_token_(consumes(iConfig.getParameter("vertices"))) , sv_token_(consumes(iConfig.getParameter("secondary_vertices"))) @@ -109,16 +109,13 @@ void DeepBoostedJetTagInfoProducer::fillDescriptions(edm::ConfigurationDescripti { // pfDeepBoostedJetTagInfos edm::ParameterSetDescription desc; - // update_jets: - // set to true if applying on existing jet collections (whose daughters are *not* puppi weighted) - // set to false if the jet collection is (re)clustered (whose daughters are puppi weighted) - desc.add("update_jets", true); + desc.add("has_puppi_weighted_daughters", true); desc.add("jet_radius", 0.8); - desc.add("min_jet_pt", 170); - desc.add("minPtForTrackProperties", -1); + desc.add("min_jet_pt", 150); + desc.add("min_pt_for_track_properties", -1); desc.add("vertices", edm::InputTag("offlinePrimaryVertices")); desc.add("secondary_vertices", edm::InputTag("inclusiveCandidateSecondaryVertices")); - desc.add("jets", edm::InputTag("slimmedJetsAK8")); + desc.add("jets", edm::InputTag("ak8PFJetsPuppi")); desc.add("puppi_value_map", edm::InputTag("puppi")); desc.add("vertex_associator", edm::InputTag("primaryVertexAssociation","original")); desc.add>("feature_names", std::vector{ @@ -133,9 +130,6 @@ void DeepBoostedJetTagInfoProducer::fillDescriptions(edm::ConfigurationDescripti "pfcand_isChargedHad", "pfcand_isGamma", "pfcand_isNeutralHad", - "pfcand_pt", - "pfcand_ptrel", - "pfcand_erel", "pfcand_phirel", "pfcand_etarel", "pfcand_deltaR", @@ -159,39 +153,27 @@ void DeepBoostedJetTagInfoProducer::fillDescriptions(edm::ConfigurationDescripti "pfcand_dxydz", "pfcand_dphidxy", "pfcand_dlambdadz", - "pfcand_btagMomentum", - "pfcand_btagEta", "pfcand_btagEtaRel", "pfcand_btagPtRel", - "pfcand_btagPPar", - "pfcand_btagDeltaR", - "pfcand_btagPtRatio", "pfcand_btagPParRatio", "pfcand_btagSip2dVal", "pfcand_btagSip2dSig", "pfcand_btagSip3dVal", "pfcand_btagSip3dSig", "pfcand_btagJetDistVal", - "sv_ptrel", - "sv_erel", "sv_phirel", "sv_etarel", "sv_deltaR", - "sv_pt", "sv_abseta", "sv_mass", "sv_ptrel_log", "sv_erel_log", "sv_pt_log", "sv_ntracks", - "sv_chi2", - "sv_ndf", "sv_normchi2", "sv_dxy", - "sv_dxyerr", "sv_dxysig", "sv_d3d", - "sv_d3derr", "sv_d3dsig", "sv_costhetasvpv", }); @@ -230,19 +212,20 @@ void DeepBoostedJetTagInfoProducer::produce(edm::Event& iEvent, const edm::Event for (std::size_t jet_n = 0; jet_n < jets->size(); jet_n++){ - // reco jet reference (use as much as possible) const auto& jet = jets->at(jet_n); edm::RefToBase jet_ref(jets, jet_n); // create jet features DeepBoostedJetFeatures features; // declare all the feature variables (init as empty vector) - for (const auto &name : feature_names_) features.add(name); - // fill only if above pt threshold - if (jet.pt() > min_jet_pt_){ - fillParticleFeatures(features, jet); - fillSVFeatures(features, jet); - } + for (const auto &name : feature_names_) { features.add(name); } + + // fill values only if above pt threshold and has daughters, otherwise left empty + if (jet.pt() < min_jet_pt_) continue; + if (jet.numberOfDaughters() == 0) continue; + + fillParticleFeatures(features, jet); + fillSVFeatures(features, jet); output_tag_infos->emplace_back(features, jet_ref); } @@ -267,7 +250,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures return pack_cand->puppiWeight(); } else if (reco_cand) { if (use_puppi_value_map_){ return (*puppi_value_map)[cand]; } - else { throw edm::Exception(edm::errors::InvalidReference) << "PUPPI value map is missing"; } + else { throw edm::Exception(edm::errors::InvalidReference) << "Puppi value map is missing"; } } else { throw edm::Exception(edm::errors::InvalidReference) << "Cannot convert to either pat::PackedCandidate or reco::PFCandidate"; } @@ -280,10 +263,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures daughters.push_back(cand); } // sort by (Puppi-weighted) pt - if (update_jets_) { - // updating jet collection: - // linked daughters here are the original PackedCandidates - // need to scale the p4 with their puppi weights + if (!has_puppi_weighted_daughters_) { std::sort(daughters.begin(), daughters.end(), [&](const reco::CandidatePtr& a, const reco::CandidatePtr& b){ return puppiWgt(a)*a->pt() > puppiWgt(b)*b->pt(); }); }else{ std::sort(daughters.begin(), daughters.end(), [](const reco::CandidatePtr& a, const reco::CandidatePtr& b){ return a->pt() > b->pt(); }); @@ -300,10 +280,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures auto puppiP4 = cand->p4(); if (packed_cand){ - if (update_jets_) { - // updating jet collection: - // linked daughters here are the original PackedCandidates - // need to scale the p4 with their puppi weights + if (!has_puppi_weighted_daughters_) { puppiP4 *= packed_cand->puppiWeight(); } @@ -362,10 +339,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures } - // basic kinematics, valid for both charged and neutral - fts.fill("pfcand_pt", puppiP4.pt()); - fts.fill("pfcand_ptrel", puppiP4.pt()/jet.pt()); - fts.fill("pfcand_erel", puppiP4.energy()/jet.energy()); + // basic kinematics fts.fill("pfcand_phirel", reco::deltaPhi(puppiP4, jet)); fts.fill("pfcand_etarel", etasign * (puppiP4.eta() - jet.eta())); fts.fill("pfcand_deltaR", reco::deltaR(puppiP4, jet)); @@ -395,7 +369,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures const reco::Track *trk = nullptr; if (packed_cand) { trk = packed_cand->bestTrack(); } - else if (useTrackProperties(reco_cand)) { trk= reco_cand->bestTrack(); } + else if (reco_cand && useTrackProperties(reco_cand)) { trk= reco_cand->bestTrack(); } if (trk){ fts.fill("pfcand_normchi2", catch_infs(std::floor(trk->normalizedChi2()))); @@ -414,12 +388,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures TrackInfoBuilder trkinfo(track_builder); trkinfo.buildTrackInfo(&(*cand), jet_dir, jet_ref_track_dir, *pv_); - fts.fill("pfcand_btagMomentum", trkinfo.getTrackMomentum()); - fts.fill("pfcand_btagEta", trkinfo.getTrackEta()); fts.fill("pfcand_btagEtaRel", trkinfo.getTrackEtaRel()); - fts.fill("pfcand_btagPtRel", trkinfo.getTrackPtRel()); - fts.fill("pfcand_btagPPar", trkinfo.getTrackPPar()); - fts.fill("pfcand_btagDeltaR", trkinfo.getTrackDeltaR()); fts.fill("pfcand_btagPtRatio", trkinfo.getTrackPtRatio()); fts.fill("pfcand_btagPParRatio", trkinfo.getTrackPParRatio()); fts.fill("pfcand_btagSip2dVal", trkinfo.getTrackSip2dVal()); @@ -439,12 +408,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures fts.fill("pfcand_dphidxy", 0); fts.fill("pfcand_dlambdadz", 0); - fts.fill("pfcand_btagMomentum", 0); - fts.fill("pfcand_btagEta", 0); fts.fill("pfcand_btagEtaRel", 0); - fts.fill("pfcand_btagPtRel", 0); - fts.fill("pfcand_btagPPar", 0); - fts.fill("pfcand_btagDeltaR", 0); fts.fill("pfcand_btagPtRatio", 0); fts.fill("pfcand_btagPParRatio", 0); fts.fill("pfcand_btagSip2dVal", 0); @@ -474,12 +438,9 @@ void DeepBoostedJetTagInfoProducer::fillSVFeatures(DeepBoostedJetFeatures &fts, for (const auto *sv : jetSVs){ // basic kinematics - fts.fill("sv_ptrel", sv->pt() / jet.pt()); - fts.fill("sv_erel", sv->energy() / jet.energy()); fts.fill("sv_phirel", reco::deltaPhi(*sv, jet)); fts.fill("sv_etarel", etasign * (sv->eta() - jet.eta())); fts.fill("sv_deltaR", reco::deltaR(*sv, jet)); - fts.fill("sv_pt", sv->pt()); fts.fill("sv_abseta", std::abs(sv->eta())); fts.fill("sv_mass", sv->mass()); @@ -489,19 +450,16 @@ void DeepBoostedJetTagInfoProducer::fillSVFeatures(DeepBoostedJetFeatures &fts, // sv properties fts.fill("sv_ntracks", sv->numberOfDaughters()); - fts.fill("sv_chi2", sv->vertexChi2()); - fts.fill("sv_ndf", sv->vertexNdof()); fts.fill("sv_normchi2", catch_infs(sv->vertexNormalizedChi2())); const auto &dxy = vertexDxy(*sv, *pv_); fts.fill("sv_dxy", dxy.value()); - fts.fill("sv_dxyerr", dxy.error()); fts.fill("sv_dxysig", dxy.significance()); const auto &d3d = vertexD3d(*sv, *pv_); fts.fill("sv_d3d", d3d.value()); - fts.fill("sv_d3derr", d3d.error()); fts.fill("sv_d3dsig", d3d.significance()); + fts.fill("sv_costhetasvpv", vertexDdotP(*sv, *pv_)); } diff --git a/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagsProducer.cc b/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagsProducer.cc index bd0121c7b521f..d4a393cc3e81d 100644 --- a/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagsProducer.cc +++ b/RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagsProducer.cc @@ -18,15 +18,9 @@ #include #include -// TO BE IMPROVED -// Declaration of the data structure that is hold by the edm::GlobalCache. -// In TensorFlow, the computational graph is stored in a stateless graph object which can be shared -// by multiple session instances which handle the initialization of variables related to the graph. -// Following this approach in CMSSW, a graph should be stored in a GlobalCache which can be accessed -// by sessions owned by multiple stream module copies. Instead of using only the plain graph, we -// make use of a cache struct that can be extended in the future if nedded. In addition, the graph -// is protected via std::atomic, which should not affect the performance as it is only accessed in -// the module constructor and not in the actual produce loop. +// Currently hold the raw model/param data in the edm::GlobalCache. +// Can be improved by holding the mxnet Symbol/NDArrays in the cache +// when moving to the MXNet C++ API. struct MXBufferFileCache { MXBufferFileCache() : model_data(nullptr), param_data(nullptr) { } @@ -39,9 +33,8 @@ struct MXBufferFileCache { struct PreprocessParams { struct VarInfo { VarInfo() {} - VarInfo(float median, float upper) : center(median), scale(upper-median) { - if (scale==0) scale=1; - } + VarInfo(float median, float upper) : + center(median), scale(upper==median ? 1 : upper-median) {} float center = 0; float scale = 1; }; @@ -86,7 +79,7 @@ class DeepBoostedJetTagsProducer : public edm::stream::EDProducer src_; std::vector>> flav_pairs_; - std::vector input_names_; // names of each input group :: the ordering is important! + std::vector input_names_; // names of each input group - the ordering is important! std::vector> input_shapes_; // shapes of each input group std::unordered_map prep_info_map_; // preprocessing info for each input group @@ -112,7 +105,7 @@ DeepBoostedJetTagsProducer::DeepBoostedJetTagsProducer(const edm::ParameterSet& auto& prep_params = prep_info_map_[group_name]; prep_params.var_length = group_pset.getParameter("var_length"); prep_params.var_names = group_pset.getParameter>("var_names"); - auto &var_info_pset = group_pset.getParameterSet("var_infos"); + const auto &var_info_pset = group_pset.getParameterSet("var_infos"); for (const auto &var_name : prep_params.var_names){ const auto &var_pset = var_info_pset.getParameterSet(var_name); double median = var_pset.getParameter("median"); @@ -242,7 +235,7 @@ void DeepBoostedJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSet for (unsigned jet_n=0; jet_nsize(); ++jet_n){ const auto& taginfo = tag_infos->at(jet_n); - std::vector outputs(flav_pairs_.size(), 0); + std::vector outputs(flav_pairs_.size(), 0); // init as all zeros if (!taginfo.features().empty()){ // convert inputs @@ -284,8 +277,8 @@ void DeepBoostedJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSet std::vector DeepBoostedJetTagsProducer::center_norm_pad( const std::vector& input, float center, float scale, unsigned target_length, float pad_value, float min, float max) { - // do variable shifting/scaling/padding/clipping in one go + auto clip = [](float value, float low, float high){ if (low >= high) throw cms::Exception("InvalidArgument") << "Error in clip: low >= high!"; if (value < low) return low; @@ -307,12 +300,20 @@ void DeepBoostedJetTagsProducer::make_inputs(const reco::DeepBoostedJetTagInfo& data_.clear(); for (const auto &group_name : input_names_) { // initiate with an empty vector - data_.push_back({}); + data_.emplace_back(); auto &group_values = data_.back(); const auto& prep_params = prep_info_map_.at(group_name); // transform/pad + int var_ref_len = -1; for (const auto &varname : prep_params.var_names){ const auto &raw_value = taginfo.features().get(varname); + // check consistency of the variable length + if (var_ref_len == -1) { + var_ref_len = raw_value.size(); + } else { + if (static_cast(raw_value.size()) != var_ref_len) + throw cms::Exception("InvalidArgument") << "Inconsistent variable length " << raw_value.size() << " for " << varname << ", should be " << var_ref_len; + } const auto &info = prep_params.get_info(varname); float pad = 0; // pad w/ zero auto val = center_norm_pad(raw_value, info.center, info.scale, prep_params.var_length, pad, -5, 5);