Skip to content

Commit

Permalink
Some improvements and clean-ups.
Browse files Browse the repository at this point in the history
  • Loading branch information
hqucms committed Sep 11, 2018
1 parent d7a1c26 commit d0e4e1a
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def applyDeepBtagging( process, postfix="" ) :



# update slimmed jets to include DeepFlavour (keep same name)
# make clone for DeepDoubleB-less slimmed AK8 jets, so output name is preserved
# update slimmed jets to include particle-based deep taggers (keep same name)
# make clone for DeepTags-less slimmed AK8 jets, so output name is preserved
addToProcessAndTask('slimmedJetsAK8NoDeepTags', process.slimmedJetsAK8.clone(), process, task)
updateJetCollection(
process,
Expand Down Expand Up @@ -114,7 +114,7 @@ def applyDeepBtagging( process, postfix="" ) :
printWarning = False
)

# slimmedJetsAK8 with DeepDoubleB (remove DeepDoubleB-less)
# slimmedJetsAK8 with DeepTags (remove DeepTags-less)
delattr(process, 'slimmedJetsAK8')
addToProcessAndTask('slimmedJetsAK8', getattr(process,'selectedUpdatedPatJetsSlimmedAK8DeepTags'+postfix).clone(), process, task)
# delete module not used anymore (slimmedJetsAK8 substitutes)
Expand Down
20 changes: 14 additions & 6 deletions PhysicsTools/PatAlgos/python/tools/jetTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,21 +633,29 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou
if btagInfo == 'pfDeepBoostedJetTagInfos':
jetSrcName = jetSource.value().lower()
if 'slimmed' in jetSrcName or 'updated' in jetSrcName:
# running on slimmedJetsAK8 or re-applying btag on MiniAOD
update_jets = cms.bool(True)
# case 1: update jets whose daughters are PackedCandidates, e.g., slimmedJetsAK8
# daughters are links to original PackedCandidates, so NOT scaled by their puppi weights
has_puppi_weighted_daughters = cms.bool(False)
puppi_value_map = cms.InputTag("")
vertex_associator = cms.InputTag("")
else:
sys.stderr.write("Warning: running pfDeepBoostedJetTagInfos on %s is not supported yet.\n"%jetSource)
update_jets = cms.bool(False)
puppi_value_map = cms.InputTag("puppi")
vertex_associator = cms.InputTag("primaryVertexAssociation","original")
has_puppi_weighted_daughters = cms.bool(True)
# daughters are the particles used in jet clustering, so already scaled by their puppi weights
if pfCandidates.value() == 'packedPFCandidates':
# case 2: running on new jet collection whose daughters are PackedCandidates (e.g., recluster jets from MiniAOD files)
puppi_value_map = cms.InputTag("")
vertex_associator = cms.InputTag("")
elif pfCandidates.value() == 'particleFlow':
# case 3: running on new jet collection whose daughters are PFCandidates (e.g., cluster jets in RECO/AOD)
puppi_value_map = cms.InputTag("puppi")
vertex_associator = cms.InputTag("primaryVertexAssociation","original")
addToProcessAndTask(btagPrefix+btagInfo+labelName+postfix,
btag.pfDeepBoostedJetTagInfos.clone(
jets = jetSource,
vertices = pvSource,
secondary_vertices = svSource,
update_jets = update_jets,
has_puppi_weighted_daughters = has_puppi_weighted_daughters,
puppi_value_map = puppi_value_map,
vertex_associator = vertex_associator,
),
Expand Down
2 changes: 0 additions & 2 deletions RecoBTag/DeepBoostedJet/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
<library file="*.cc" name="RecoBTagDeepBoostedJetPlugins">
<use name="DataFormats/BTauReco"/>
<use name="DataFormats/Common"/>
<use name="RecoBTag/Combined"/>
<use name="RecoBTag/TensorFlow"/>
<use name="DataFormats/PatCandidates"/>
<use name="TrackingTools/IPTools"/>
<use name="PhysicsTools/MXNet"/>
<flags EDM_PLUGIN="1"/>
</library>
84 changes: 21 additions & 63 deletions RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagInfoProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class DeepBoostedJetTagInfoProducer : public edm::stream::EDProducer<>
void fillParticleFeatures(DeepBoostedJetFeatures &fts, const reco::Jet &jet);
void fillSVFeatures(DeepBoostedJetFeatures &fts, const reco::Jet &jet);

const bool update_jets_;
const bool has_puppi_weighted_daughters_;
const double jet_radius_;
const double min_jet_pt_;
const double min_pt_for_track_properties_;
Expand Down Expand Up @@ -72,10 +72,10 @@ class DeepBoostedJetTagInfoProducer : public edm::stream::EDProducer<>
};

DeepBoostedJetTagInfoProducer::DeepBoostedJetTagInfoProducer(const edm::ParameterSet& iConfig)
: update_jets_(iConfig.getParameter<bool>("update_jets"))
: has_puppi_weighted_daughters_(iConfig.getParameter<bool>("has_puppi_weighted_daughters"))
, jet_radius_(iConfig.getParameter<double>("jet_radius"))
, min_jet_pt_(iConfig.getParameter<double>("min_jet_pt"))
, min_pt_for_track_properties_(iConfig.getParameter<double>("minPtForTrackProperties"))
, min_pt_for_track_properties_(iConfig.getParameter<double>("min_pt_for_track_properties"))
, jet_token_(consumes<edm::View<reco::Jet>>(iConfig.getParameter<edm::InputTag>("jets")))
, vtx_token_(consumes<VertexCollection>(iConfig.getParameter<edm::InputTag>("vertices")))
, sv_token_(consumes<SVCollection>(iConfig.getParameter<edm::InputTag>("secondary_vertices")))
Expand Down Expand Up @@ -109,16 +109,13 @@ void DeepBoostedJetTagInfoProducer::fillDescriptions(edm::ConfigurationDescripti
{
// pfDeepBoostedJetTagInfos
edm::ParameterSetDescription desc;
// update_jets:
// set to true if applying on existing jet collections (whose daughters are *not* puppi weighted)
// set to false if the jet collection is (re)clustered (whose daughters are puppi weighted)
desc.add<bool>("update_jets", true);
desc.add<bool>("has_puppi_weighted_daughters", true);
desc.add<double>("jet_radius", 0.8);
desc.add<double>("min_jet_pt", 170);
desc.add<double>("minPtForTrackProperties", -1);
desc.add<double>("min_jet_pt", 150);
desc.add<double>("min_pt_for_track_properties", -1);
desc.add<edm::InputTag>("vertices", edm::InputTag("offlinePrimaryVertices"));
desc.add<edm::InputTag>("secondary_vertices", edm::InputTag("inclusiveCandidateSecondaryVertices"));
desc.add<edm::InputTag>("jets", edm::InputTag("slimmedJetsAK8"));
desc.add<edm::InputTag>("jets", edm::InputTag("ak8PFJetsPuppi"));
desc.add<edm::InputTag>("puppi_value_map", edm::InputTag("puppi"));
desc.add<edm::InputTag>("vertex_associator", edm::InputTag("primaryVertexAssociation","original"));
desc.add<std::vector<std::string>>("feature_names", std::vector<std::string>{
Expand All @@ -133,9 +130,6 @@ void DeepBoostedJetTagInfoProducer::fillDescriptions(edm::ConfigurationDescripti
"pfcand_isChargedHad",
"pfcand_isGamma",
"pfcand_isNeutralHad",
"pfcand_pt",
"pfcand_ptrel",
"pfcand_erel",
"pfcand_phirel",
"pfcand_etarel",
"pfcand_deltaR",
Expand All @@ -159,39 +153,27 @@ void DeepBoostedJetTagInfoProducer::fillDescriptions(edm::ConfigurationDescripti
"pfcand_dxydz",
"pfcand_dphidxy",
"pfcand_dlambdadz",
"pfcand_btagMomentum",
"pfcand_btagEta",
"pfcand_btagEtaRel",
"pfcand_btagPtRel",
"pfcand_btagPPar",
"pfcand_btagDeltaR",
"pfcand_btagPtRatio",
"pfcand_btagPParRatio",
"pfcand_btagSip2dVal",
"pfcand_btagSip2dSig",
"pfcand_btagSip3dVal",
"pfcand_btagSip3dSig",
"pfcand_btagJetDistVal",
"sv_ptrel",
"sv_erel",
"sv_phirel",
"sv_etarel",
"sv_deltaR",
"sv_pt",
"sv_abseta",
"sv_mass",
"sv_ptrel_log",
"sv_erel_log",
"sv_pt_log",
"sv_ntracks",
"sv_chi2",
"sv_ndf",
"sv_normchi2",
"sv_dxy",
"sv_dxyerr",
"sv_dxysig",
"sv_d3d",
"sv_d3derr",
"sv_d3dsig",
"sv_costhetasvpv",
});
Expand Down Expand Up @@ -230,19 +212,20 @@ void DeepBoostedJetTagInfoProducer::produce(edm::Event& iEvent, const edm::Event

for (std::size_t jet_n = 0; jet_n < jets->size(); jet_n++){

// reco jet reference (use as much as possible)
const auto& jet = jets->at(jet_n);
edm::RefToBase<reco::Jet> jet_ref(jets, jet_n);

// create jet features
DeepBoostedJetFeatures features;
// declare all the feature variables (init as empty vector)
for (const auto &name : feature_names_) features.add(name);
// fill only if above pt threshold
if (jet.pt() > min_jet_pt_){
fillParticleFeatures(features, jet);
fillSVFeatures(features, jet);
}
for (const auto &name : feature_names_) { features.add(name); }

// fill values only if above pt threshold and has daughters, otherwise left empty
if (jet.pt() < min_jet_pt_) continue;
if (jet.numberOfDaughters() == 0) continue;

fillParticleFeatures(features, jet);
fillSVFeatures(features, jet);

output_tag_infos->emplace_back(features, jet_ref);
}
Expand All @@ -267,7 +250,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures
return pack_cand->puppiWeight();
} else if (reco_cand) {
if (use_puppi_value_map_){ return (*puppi_value_map)[cand]; }
else { throw edm::Exception(edm::errors::InvalidReference) << "PUPPI value map is missing"; }
else { throw edm::Exception(edm::errors::InvalidReference) << "Puppi value map is missing"; }
} else {
throw edm::Exception(edm::errors::InvalidReference) << "Cannot convert to either pat::PackedCandidate or reco::PFCandidate";
}
Expand All @@ -280,10 +263,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures
daughters.push_back(cand);
}
// sort by (Puppi-weighted) pt
if (update_jets_) {
// updating jet collection:
// linked daughters here are the original PackedCandidates
// need to scale the p4 with their puppi weights
if (!has_puppi_weighted_daughters_) {
std::sort(daughters.begin(), daughters.end(), [&](const reco::CandidatePtr& a, const reco::CandidatePtr& b){ return puppiWgt(a)*a->pt() > puppiWgt(b)*b->pt(); });
}else{
std::sort(daughters.begin(), daughters.end(), [](const reco::CandidatePtr& a, const reco::CandidatePtr& b){ return a->pt() > b->pt(); });
Expand All @@ -300,10 +280,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures

auto puppiP4 = cand->p4();
if (packed_cand){
if (update_jets_) {
// updating jet collection:
// linked daughters here are the original PackedCandidates
// need to scale the p4 with their puppi weights
if (!has_puppi_weighted_daughters_) {
puppiP4 *= packed_cand->puppiWeight();
}

Expand Down Expand Up @@ -362,10 +339,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures

}

// basic kinematics, valid for both charged and neutral
fts.fill("pfcand_pt", puppiP4.pt());
fts.fill("pfcand_ptrel", puppiP4.pt()/jet.pt());
fts.fill("pfcand_erel", puppiP4.energy()/jet.energy());
// basic kinematics
fts.fill("pfcand_phirel", reco::deltaPhi(puppiP4, jet));
fts.fill("pfcand_etarel", etasign * (puppiP4.eta() - jet.eta()));
fts.fill("pfcand_deltaR", reco::deltaR(puppiP4, jet));
Expand Down Expand Up @@ -395,7 +369,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures

const reco::Track *trk = nullptr;
if (packed_cand) { trk = packed_cand->bestTrack(); }
else if (useTrackProperties(reco_cand)) { trk= reco_cand->bestTrack(); }
else if (reco_cand && useTrackProperties(reco_cand)) { trk= reco_cand->bestTrack(); }
if (trk){
fts.fill("pfcand_normchi2", catch_infs(std::floor(trk->normalizedChi2())));

Expand All @@ -414,12 +388,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures

TrackInfoBuilder trkinfo(track_builder);
trkinfo.buildTrackInfo(&(*cand), jet_dir, jet_ref_track_dir, *pv_);
fts.fill("pfcand_btagMomentum", trkinfo.getTrackMomentum());
fts.fill("pfcand_btagEta", trkinfo.getTrackEta());
fts.fill("pfcand_btagEtaRel", trkinfo.getTrackEtaRel());
fts.fill("pfcand_btagPtRel", trkinfo.getTrackPtRel());
fts.fill("pfcand_btagPPar", trkinfo.getTrackPPar());
fts.fill("pfcand_btagDeltaR", trkinfo.getTrackDeltaR());
fts.fill("pfcand_btagPtRatio", trkinfo.getTrackPtRatio());
fts.fill("pfcand_btagPParRatio", trkinfo.getTrackPParRatio());
fts.fill("pfcand_btagSip2dVal", trkinfo.getTrackSip2dVal());
Expand All @@ -439,12 +408,7 @@ void DeepBoostedJetTagInfoProducer::fillParticleFeatures(DeepBoostedJetFeatures
fts.fill("pfcand_dphidxy", 0);
fts.fill("pfcand_dlambdadz", 0);

fts.fill("pfcand_btagMomentum", 0);
fts.fill("pfcand_btagEta", 0);
fts.fill("pfcand_btagEtaRel", 0);
fts.fill("pfcand_btagPtRel", 0);
fts.fill("pfcand_btagPPar", 0);
fts.fill("pfcand_btagDeltaR", 0);
fts.fill("pfcand_btagPtRatio", 0);
fts.fill("pfcand_btagPParRatio", 0);
fts.fill("pfcand_btagSip2dVal", 0);
Expand Down Expand Up @@ -474,12 +438,9 @@ void DeepBoostedJetTagInfoProducer::fillSVFeatures(DeepBoostedJetFeatures &fts,

for (const auto *sv : jetSVs){
// basic kinematics
fts.fill("sv_ptrel", sv->pt() / jet.pt());
fts.fill("sv_erel", sv->energy() / jet.energy());
fts.fill("sv_phirel", reco::deltaPhi(*sv, jet));
fts.fill("sv_etarel", etasign * (sv->eta() - jet.eta()));
fts.fill("sv_deltaR", reco::deltaR(*sv, jet));
fts.fill("sv_pt", sv->pt());
fts.fill("sv_abseta", std::abs(sv->eta()));
fts.fill("sv_mass", sv->mass());

Expand All @@ -489,19 +450,16 @@ void DeepBoostedJetTagInfoProducer::fillSVFeatures(DeepBoostedJetFeatures &fts,

// sv properties
fts.fill("sv_ntracks", sv->numberOfDaughters());
fts.fill("sv_chi2", sv->vertexChi2());
fts.fill("sv_ndf", sv->vertexNdof());
fts.fill("sv_normchi2", catch_infs(sv->vertexNormalizedChi2()));

const auto &dxy = vertexDxy(*sv, *pv_);
fts.fill("sv_dxy", dxy.value());
fts.fill("sv_dxyerr", dxy.error());
fts.fill("sv_dxysig", dxy.significance());

const auto &d3d = vertexD3d(*sv, *pv_);
fts.fill("sv_d3d", d3d.value());
fts.fill("sv_d3derr", d3d.error());
fts.fill("sv_d3dsig", d3d.significance());

fts.fill("sv_costhetasvpv", vertexDdotP(*sv, *pv_));
}

Expand Down
35 changes: 18 additions & 17 deletions RecoBTag/DeepBoostedJet/plugins/DeepBoostedJetTagsProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <iostream>
#include <fstream>

// TO BE IMPROVED
// Declaration of the data structure that is hold by the edm::GlobalCache.
// In TensorFlow, the computational graph is stored in a stateless graph object which can be shared
// by multiple session instances which handle the initialization of variables related to the graph.
// Following this approach in CMSSW, a graph should be stored in a GlobalCache which can be accessed
// by sessions owned by multiple stream module copies. Instead of using only the plain graph, we
// make use of a cache struct that can be extended in the future if nedded. In addition, the graph
// is protected via std::atomic, which should not affect the performance as it is only accessed in
// the module constructor and not in the actual produce loop.
// Currently hold the raw model/param data in the edm::GlobalCache.
// Can be improved by holding the mxnet Symbol/NDArrays in the cache
// when moving to the MXNet C++ API.
struct MXBufferFileCache {
MXBufferFileCache() : model_data(nullptr), param_data(nullptr) {
}
Expand All @@ -39,9 +33,8 @@ struct MXBufferFileCache {
struct PreprocessParams {
struct VarInfo {
VarInfo() {}
VarInfo(float median, float upper) : center(median), scale(upper-median) {
if (scale==0) scale=1;
}
VarInfo(float median, float upper) :
center(median), scale(upper==median ? 1 : upper-median) {}
float center = 0;
float scale = 1;
};
Expand Down Expand Up @@ -86,7 +79,7 @@ class DeepBoostedJetTagsProducer : public edm::stream::EDProducer<edm::GlobalCac

const edm::EDGetTokenT< TagInfoCollection > src_;
std::vector<std::pair<std::string,std::vector<unsigned int>>> flav_pairs_;
std::vector<std::string> input_names_; // names of each input group :: the ordering is important!
std::vector<std::string> input_names_; // names of each input group - the ordering is important!
std::vector<std::vector<unsigned int>> input_shapes_; // shapes of each input group
std::unordered_map<std::string, PreprocessParams> prep_info_map_; // preprocessing info for each input group

Expand All @@ -112,7 +105,7 @@ DeepBoostedJetTagsProducer::DeepBoostedJetTagsProducer(const edm::ParameterSet&
auto& prep_params = prep_info_map_[group_name];
prep_params.var_length = group_pset.getParameter<unsigned>("var_length");
prep_params.var_names = group_pset.getParameter<std::vector<std::string>>("var_names");
auto &var_info_pset = group_pset.getParameterSet("var_infos");
const auto &var_info_pset = group_pset.getParameterSet("var_infos");
for (const auto &var_name : prep_params.var_names){
const auto &var_pset = var_info_pset.getParameterSet(var_name);
double median = var_pset.getParameter<double>("median");
Expand Down Expand Up @@ -242,7 +235,7 @@ void DeepBoostedJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSet
for (unsigned jet_n=0; jet_n<tag_infos->size(); ++jet_n){

const auto& taginfo = tag_infos->at(jet_n);
std::vector<float> outputs(flav_pairs_.size(), 0);
std::vector<float> outputs(flav_pairs_.size(), 0); // init as all zeros

if (!taginfo.features().empty()){
// convert inputs
Expand Down Expand Up @@ -284,8 +277,8 @@ void DeepBoostedJetTagsProducer::produce(edm::Event& iEvent, const edm::EventSet
std::vector<float> DeepBoostedJetTagsProducer::center_norm_pad(
const std::vector<float>& input, float center, float scale,
unsigned target_length, float pad_value, float min, float max) {

// do variable shifting/scaling/padding/clipping in one go

auto clip = [](float value, float low, float high){
if (low >= high) throw cms::Exception("InvalidArgument") << "Error in clip: low >= high!";
if (value < low) return low;
Expand All @@ -307,12 +300,20 @@ void DeepBoostedJetTagsProducer::make_inputs(const reco::DeepBoostedJetTagInfo&
data_.clear();
for (const auto &group_name : input_names_) {
// initiate with an empty vector
data_.push_back({});
data_.emplace_back();
auto &group_values = data_.back();
const auto& prep_params = prep_info_map_.at(group_name);
// transform/pad
int var_ref_len = -1;
for (const auto &varname : prep_params.var_names){
const auto &raw_value = taginfo.features().get(varname);
// check consistency of the variable length
if (var_ref_len == -1) {
var_ref_len = raw_value.size();
} else {
if (static_cast<int>(raw_value.size()) != var_ref_len)
throw cms::Exception("InvalidArgument") << "Inconsistent variable length " << raw_value.size() << " for " << varname << ", should be " << var_ref_len;
}
const auto &info = prep_params.get_info(varname);
float pad = 0; // pad w/ zero
auto val = center_norm_pad(raw_value, info.center, info.scale, prep_params.var_length, pad, -5, 5);
Expand Down

0 comments on commit d0e4e1a

Please sign in to comment.