Source code for higgs_dna.selections.jet_selections

from higgs_dna.selections.object_selections import delta_r_mask
from higgs_dna.tools.jetID import add_jetId
import awkward as ak
import correctionlib
import os
from coffea.analysis_tools import PackedSelection
import numpy as np
from correctionlib.highlevel import model_auto, open_auto
import json
import logging

logger = logging.getLogger(__name__)


[docs]def getBTagMVACut(mva_name, mva_wp, year): mva_name_to_btag_wp_name = { "particleNet": "particleNet_wp_values", "deepJet": "deepJet_wp_values", "robustParticleTransformer": "robustParticleTransformer_wp_values", "btagUParTAK4B": "UParTAK4_wp_values", } # Based on recommendations for the tight QCD WP seen here: # https://btv-wiki.docs.cern.ch/PerformanceCalibration/#working-points btag_correction_configs = { "2016preVFP": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2016preVFP_UL/btagging.json.gz", ) }, "2016postVFP": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2016postVFP_UL/btagging.json.gz", ) }, "2017": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2017_UL/btagging.json.gz", ) }, "2018": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2018_UL/btagging.json.gz", ) }, "2022preEE": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2022_Summer22/btagging.json.gz", ) }, "2022postEE": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2022_Summer22EE/btagging.json.gz", ) }, "2023preBPix": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2023_Summer23/btagging.json.gz", ) }, "2023postBPix": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2023_Summer23BPix/btagging.json.gz", ) }, "2024": { "file": os.path.join( os.path.dirname(__file__), "..", "systematics", "JSONs", "bTagSF", "2024_Summer24/btagging.json.gz", ) }, } avail_years = [ "2016preVFP", "2016postVFP", "2017", "2018", "2022preEE", "2022postEE", "2023preBPix", "2023postBPix", "2024", ] if year == "2025": year = "2024" logger.info( "BTV correctionlib for 2025 not available, using 2024 corrections instead." ) if year not in avail_years: logger.warning( f"\n BTV correctionlib for {year} not found! Don't cut on the selected B-Tag MVA. The b-related variables are most likely not correct.\n" ) return -999.0 cset = correctionlib.CorrectionSet.from_file( btag_correction_configs[year]["file"] ) btag_wp_name = mva_name_to_btag_wp_name[mva_name] if btag_wp_name not in set(cset.keys()): logger.warning( f"\n BTV correctionlib for {year} does not have the tagger {mva_name}! Don't cut on the selected B-Tag MVA. The b-related variables are most likely not correct.\n" ) return -999.0 mva_cut_value = cset[btag_wp_name].evaluate(mva_wp) return mva_cut_value
[docs]def select_jets( self, jets: ak.highlevel.Array, diphotons: ak.highlevel.Array, muons: ak.highlevel.Array, electrons: ak.highlevel.Array, taus: ak.highlevel.Array = None, ) -> ak.highlevel.Array: if self.jet_jetId == "tight": jetId_cut = jets.jetId >= 2 elif self.jet_jetId == "tightLepVeto": jetId_cut = jets.jetId == 6 else: jetId_cut = ak.ones_like(jets.pt) > 0 logger.warning("[ select_jets ] - No JetId applied") logger.debug( f"[ select_jets ] - Total: {ak.sum(ak.flatten((ak.ones_like(jets.pt) > 0)))} - Pass tight jetId: {ak.sum(ak.flatten(jetId_cut))}" ) pt_cut = jets.pt > self.jet_pt_threshold eta_cut = abs(jets.eta) < self.jet_max_eta dr_dipho_cut = ak.ones_like(pt_cut) > 0 if (self.clean_jet_dipho) & (ak.num(diphotons.pt, axis=0) > 0): dr_dipho_cut = delta_r_mask(jets, diphotons, self.jet_dipho_min_dr) if (self.clean_jet_pho) & (ak.num(diphotons.pt, axis=0) > 0): lead = ak.zip( { "pt": diphotons.pho_lead.pt, "eta": diphotons.pho_lead.eta, "phi": diphotons.pho_lead.phi, "mass": diphotons.pho_lead.mass, "charge": diphotons.pho_lead.charge, } ) lead = ak.with_name(lead, "PtEtaPhiMCandidate") sublead = ak.zip( { "pt": diphotons.pho_sublead.pt, "eta": diphotons.pho_sublead.eta, "phi": diphotons.pho_sublead.phi, "mass": diphotons.pho_sublead.mass, "charge": diphotons.pho_sublead.charge, } ) sublead = ak.with_name(sublead, "PtEtaPhiMCandidate") dr_pho_lead_cut = delta_r_mask(jets, lead, self.jet_pho_min_dr) dr_pho_sublead_cut = delta_r_mask(jets, sublead, self.jet_pho_min_dr) else: dr_pho_lead_cut = jets.pt > -1 dr_pho_sublead_cut = jets.pt > -1 if (self.clean_jet_ele) & (ak.num(electrons.pt, axis=0) > 0): dr_electrons_cut = delta_r_mask(jets, electrons, self.jet_ele_min_dr) else: dr_electrons_cut = jets.pt > -1 if (self.clean_jet_muo) & (ak.num(muons.pt, axis=0) > 0): dr_muons_cut = delta_r_mask(jets, muons, self.jet_muo_min_dr) else: dr_muons_cut = jets.pt > -1 if taus is not None: if (self.clean_jet_tau) & (ak.num(taus.pt, axis=0) > 0): dr_taus_cut = delta_r_mask(jets, taus, self.jet_tau_min_dr) else: dr_taus_cut = jets.pt > -1 else: dr_taus_cut = jets.pt > -1 return ( (jetId_cut) & (pt_cut) & (eta_cut) & (dr_dipho_cut) & (dr_pho_lead_cut) & (dr_pho_sublead_cut) & (dr_electrons_cut) & (dr_muons_cut) & (dr_taus_cut) )
[docs]def select_jets_eta_dependent( self, jets: ak.highlevel.Array, diphotons: ak.highlevel.Array, muons: ak.highlevel.Array, electrons: ak.highlevel.Array, taus: ak.highlevel.Array = None, ) -> ak.highlevel.Array: """ Selects jets with eta-dependent pT cuts, added as a temporary fix following https://gitlab.cern.ch/cms-jetmet/coordination/coordination/-/issues/113 Special args: jet_pt_thresholds (list): Minimum pT thresholds for each eta bin jet_eta_thresholds (list): Absolute eta thresholds for each bin of the selection Example usage: jet_pt_thresholds = [20, 50, 30] jet_eta_thresholds = [2.5, 3.0, 4.7] will select all jets that pass: 20 GeV for |eta| < 2.5 ; 50 GeV for 2.5<|eta|<3 ; 30 GeV for |eta|>3 """ jet_pt_thresholds = self.jet_pt_thresholds jet_eta_thresholds = self.jet_eta_thresholds assert len(jet_pt_thresholds) == len(jet_eta_thresholds), f"{jet_pt_thresholds} does not match {jet_eta_thresholds}" # Build the eta-dependent pT cut eta_pt_cut = ak.zeros_like(jets.pt, dtype=bool) for i, (pt_threshold, eta_threshold) in enumerate(zip(jet_pt_thresholds, jet_eta_thresholds)): # Handle first eta bin if i == 0: eta_pt_cut = eta_pt_cut | ((abs(jets.eta) < eta_threshold) & (jets.pt >= pt_threshold)) # Handle intermediate eta bins else: eta_pt_cut = eta_pt_cut | ((abs(jets.eta) >= jet_eta_thresholds[i - 1]) & (abs(jets.eta) < eta_threshold) & (jets.pt >= pt_threshold)) if self.jet_jetId == "tight": jetId_cut = jets.jetId >= 2 elif self.jet_jetId == "tightLepVeto": jetId_cut = jets.jetId == 6 else: jetId_cut = ak.ones_like(jets.pt) > 0 logger.warning("[ select_jets ] - No JetId applied") logger.debug( f"[ select_jets ] - Total: {ak.sum(ak.flatten((ak.ones_like(jets.pt) > 0)))} - Pass tight jetId: {ak.sum(ak.flatten(jetId_cut))}" ) eta_cut = abs(jets.eta) < self.jet_max_eta dr_dipho_cut = ak.ones_like(eta_pt_cut) > 0 if (self.clean_jet_dipho) & (ak.num(diphotons.pt, axis=0) > 0): dr_dipho_cut = delta_r_mask(jets, diphotons, self.jet_dipho_min_dr) if (self.clean_jet_pho) & (ak.num(diphotons.pt, axis=0) > 0): lead = ak.zip( { "pt": diphotons.pho_lead.pt, "eta": diphotons.pho_lead.eta, "phi": diphotons.pho_lead.phi, "mass": diphotons.pho_lead.mass, "charge": diphotons.pho_lead.charge, } ) lead = ak.with_name(lead, "PtEtaPhiMCandidate") sublead = ak.zip( { "pt": diphotons.pho_sublead.pt, "eta": diphotons.pho_sublead.eta, "phi": diphotons.pho_sublead.phi, "mass": diphotons.pho_sublead.mass, "charge": diphotons.pho_sublead.charge, } ) sublead = ak.with_name(sublead, "PtEtaPhiMCandidate") dr_pho_lead_cut = delta_r_mask(jets, lead, self.jet_pho_min_dr) dr_pho_sublead_cut = delta_r_mask(jets, sublead, self.jet_pho_min_dr) else: dr_pho_lead_cut = jets.pt > -1 dr_pho_sublead_cut = jets.pt > -1 if (self.clean_jet_ele) & (ak.num(electrons.pt, axis=0) > 0): dr_electrons_cut = delta_r_mask(jets, electrons, self.jet_ele_min_dr) else: dr_electrons_cut = jets.pt > -1 if (self.clean_jet_muo) & (ak.num(muons.pt, axis=0) > 0): dr_muons_cut = delta_r_mask(jets, muons, self.jet_muo_min_dr) else: dr_muons_cut = jets.pt > -1 if taus is not None: if (self.clean_jet_tau) & (ak.num(taus.pt, axis=0) > 0): dr_taus_cut = delta_r_mask(jets, taus, self.jet_tau_min_dr) else: dr_taus_cut = jets.pt > -1 else: dr_taus_cut = jets.pt > -1 return ( (jetId_cut) & (eta_pt_cut) & (eta_cut) & (dr_dipho_cut) & (dr_pho_lead_cut) & (dr_pho_sublead_cut) & (dr_electrons_cut) & (dr_muons_cut) & (dr_taus_cut) )
[docs]def select_fatjets( self, fatjets: ak.highlevel.Array, diphotons: ak.highlevel.Array, muons: ak.highlevel.Array, electrons: ak.highlevel.Array, ) -> ak.highlevel.Array: # same as select_jets(), but uses fatjet variables pt_cut = fatjets.pt > self.fatjet_pt_threshold eta_cut = abs(fatjets.eta) < self.fatjet_max_eta dr_dipho_cut = ak.ones_like(pt_cut) > 0 if self.clean_fatjet_dipho & (ak.num(diphotons.pt, axis=0) > 0): dr_dipho_cut = delta_r_mask(fatjets, diphotons, self.fatjet_dipho_min_dr) if (self.clean_fatjet_pho) & (ak.num(diphotons.pt, axis=0) > 0): lead = ak.zip( { "pt": diphotons.pho_lead.pt, "eta": diphotons.pho_lead.eta, "phi": diphotons.pho_lead.phi, "mass": diphotons.pho_lead.mass, "charge": diphotons.pho_lead.charge, } ) lead = ak.with_name(lead, "PtEtaPhiMCandidate") sublead = ak.zip( { "pt": diphotons.pho_sublead.pt, "eta": diphotons.pho_sublead.eta, "phi": diphotons.pho_sublead.phi, "mass": diphotons.pho_sublead.mass, "charge": diphotons.pho_sublead.charge, } ) sublead = ak.with_name(sublead, "PtEtaPhiMCandidate") dr_pho_lead_cut = delta_r_mask(fatjets, lead, self.fatjet_pho_min_dr) dr_pho_sublead_cut = delta_r_mask(fatjets, sublead, self.fatjet_pho_min_dr) else: dr_pho_lead_cut = fatjets.pt > -1 dr_pho_sublead_cut = fatjets.pt > -1 if (self.clean_fatjet_ele) & (ak.num(electrons.pt, axis=0) > 0): dr_electrons_cut = delta_r_mask(fatjets, electrons, self.fatjet_ele_min_dr) else: dr_electrons_cut = fatjets.pt > -1 if (self.clean_fatjet_muo) & (ak.num(muons.pt, axis=0) > 0): dr_muons_cut = delta_r_mask(fatjets, muons, self.fatjet_muo_min_dr) else: dr_muons_cut = fatjets.pt > -1 return ( (pt_cut) & (eta_cut) & (dr_dipho_cut) & (dr_pho_lead_cut) & (dr_pho_sublead_cut) & (dr_electrons_cut) & (dr_muons_cut) )
[docs]def jetvetomap(self, events, logger, dataset_name, year="2022preEE"): """ Jet veto map """ systematic = "jetvetomap" sel_obj = PackedSelection() json_dict = { "2016preVFP": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2016preVFP_UL/jetvetomaps.json.gz", ), "2016postVFP": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2016postVFP_UL/jetvetomaps.json.gz", ), "2017": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2017_UL/jetvetomaps.json.gz", ), "2018": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2018_UL/jetvetomaps.json.gz", ), "2022preEE": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2022_Summer22/jetvetomaps.json.gz", ), "2022postEE": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2022_Summer22EE/jetvetomaps.json.gz", ), "2023preBPix": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2023_Summer23/jetvetomaps.json.gz", ), "2023postBPix": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2023_Summer23BPix/jetvetomaps.json.gz", ), "2024": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2024_Summer24/jetvetomaps.json.gz", ), "2025": os.path.join( os.path.dirname(__file__), "../systematics/JSONs/POG/JME/2025_Winter25/jetvetomaps.json.gz", ), } key_map = { "2016preVFP": "Summer19UL16_V1", "2016postVFP": "Summer19UL16_V1", "2017": "Summer19UL17_V1", "2018": "Summer19UL18_V1", "2022preEE": "Summer22_23Sep2023_RunCD_V1", "2022postEE": "Summer22EE_23Sep2023_RunEFG_V1", "2023preBPix": "Summer23Prompt23_RunC_V1", "2023postBPix": "Summer23BPixPrompt23_RunD_V1", "2024": "Summer24Prompt24_RunBCDEFGHI_V1", "2025": "Winter25Prompt25_RunCDEFG_V1", } logger.debug( f"[{systematic}] {key_map[year]}, year: {year} to dataset: {dataset_name}" ) # Edge check of input variables. The eta and phi variables don't enable flow # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/JME_2022_Prompt_jetvetomaps.html _cset = model_auto(open_auto(json_dict[year])) _cset_json = json.loads(_cset.json()) low_eta, high_eta = ( _cset_json["corrections"][0]["data"]["content"][0]["value"]["edges"][0][0], _cset_json["corrections"][0]["data"]["content"][0]["value"]["edges"][0][-1], ) # phi value must be within [-np.pi,np.pi]. Though values beyond are observed. # Might due to the accuracy of nanoaod format. So clip the values to be # within the first and last bin centers low_phi, high_phi = ( ( _cset_json["corrections"][0]["data"]["content"][0]["value"]["edges"][1][0] + _cset_json["corrections"][0]["data"]["content"][0]["value"]["edges"][1][1] ) / 2, ( _cset_json["corrections"][0]["data"]["content"][0]["value"]["edges"][1][-1] + _cset_json["corrections"][0]["data"]["content"][0]["value"]["edges"][1][ -2 ] ) / 2, ) jets_jagged = events.Jet # remove jets out of bin edges # https://cms-nanoaod-integration.web.cern.ch/commonJSONSFs/summaries/JME_2022_Prompt_jetvetomaps.html jets_jagged = jets_jagged[ (jets_jagged.eta >= low_eta) & (jets_jagged.eta < high_eta) ] count = ak.num(jets_jagged) jets = ak.flatten(jets_jagged) cset = correctionlib.CorrectionSet.from_file(json_dict[year]) # ref: https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVRun3Analysis#From_JME # and: https://cms-talk.web.cern.ch/t/jet-veto-maps-for-run3/57850/6 input_dict = { "type": "jetvetomap", "eta": jets.eta, "phi": np.clip(jets.phi, low_phi, high_phi), } # recompute jetId before vetomap jets["jetId"] = add_jetId(jets, self.nano_version, year) run2_years = ["2016preVFP", "2016postVFP", "2017", "2018"] if year in run2_years: # Tight ID requirement jetId_cut = ((jets.jetId == 2) | (jets.jetId == 6)) else: # tightLepVeto requirement jetId_cut = (jets.jetId == 6) input_dict["type"] = "jetvetomap" inputs = [input_dict[input.name] for input in cset[key_map[year]].inputs] vetomap = cset[key_map[year]].evaluate(*(inputs)) flag_veto_jet = (np.abs(vetomap) > 0) & ( (jets.pt > 15) & (jetId_cut) & ((jets.chEmEF + jets.neEmEF) < 0.9) ) if year in run2_years: flag_veto_jet = flag_veto_jet & ( (jets.muonIdx1 == -1) & (jets.muonIdx2 == -1) ) sel_obj.add("vetomap", flag_veto_jet) sel_veto_jet = sel_obj.all(*(sel_obj.names)) sel_good_jet = ~ak.Array(sel_veto_jet) logger.debug( f"[{systematic}] total jets: {len(sel_good_jet)}, pass jets: {ak.sum(sel_good_jet)}" ) sel_good_jet_jagged = ak.unflatten(sel_good_jet, count) flag_veto_jet_jagged = ak.unflatten(flag_veto_jet, count) sel_event_veto = ~ak.any(flag_veto_jet_jagged, axis=1) # Apply the veto mask, preserving all fields if year in run2_years: # Run2: veto only jets filtered_events = events filtered_events["Photon"] = events.Photon filtered_events["Jet"] = jets_jagged[sel_good_jet_jagged] # * Need to add Muon varibles for muon s&s uncertainties filtered_events["Muon"] = events.Muon else: # Run3: veto entire event if ANY jet is bad filtered_events = events[sel_event_veto] filtered_events["Photon"] = events.Photon[sel_event_veto] filtered_events["Jet"] = (jets_jagged[sel_good_jet_jagged])[sel_event_veto] # * Need to add Muon varibles for muon s&s uncertainties filtered_events["Muon"] = events.Muon[sel_event_veto] logger.debug( f"[{systematic}] total event: {len(sel_event_veto)}, pass event: {len(filtered_events)}" ) return filtered_events