Source code for higgs_dna.tools.photonid_mva

import warnings
from typing import List, Optional, Tuple

import awkward as ak
import numpy
import xgboost


[docs]def load_photonid_mva(fname: str) -> Optional[xgboost.Booster]: try: photonid_mva = xgboost.Booster() photonid_mva.load_model(fname) except (xgboost.core.XGBoostError, FileNotFoundError) as e: warnings.warn( f"Could not load photonid MVA model from '{fname}': {e}. Skipping.", stacklevel=2 ) photonid_mva = None return photonid_mva
[docs]def calculate_photonid_mva( mva: Tuple[Optional[xgboost.Booster], List[str]], photon: ak.Array, ) -> ak.Array: """Recompute PhotonIDMVA on-the-fly. This step is necessary considering that the inputs have to be corrected with the QRC process. Following is the list of features (barrel has 12, endcap two more): EB: events.Photon.energyRaw events.Photon.r9 events.Photon.sieie events.Photon.etaWidth events.Photon.phiWidth events.Photon.sieip events.Photon.s4 events.Photon.pfPhoIso03 events.Photon.pfChargedIsoPFPV events.Photon.pfChargedIsoWorstVtx events.Photon.eta events.fixedGridRhoAll EE: EB + events.Photon.esEffSigmaRR events.Photon.esEnergyOverRawE """ photonid_mva, var_order = mva if photonid_mva is None: return ak.ones_like(photon.pt) bdt_inputs = {} bdt_inputs = numpy.column_stack( [ak.to_numpy(photon[name]) for name in var_order] ) tempmatrix = xgboost.DMatrix(bdt_inputs, feature_names=var_order) mvaID = photonid_mva.predict(tempmatrix) # Only needed to compare to TMVA # mvaID = 1.0 - 2.0 / (1.0 + numpy.exp(2.0 * mvaID)) # the previous transformation was not working correctly, peakin at about 0.7 # since we can't really remember why that functional form was picked in the first place we decided # to switch to a simpler stretch of the output that works better, even though not perfectly. # Open for changes/ideas mvaID = -1 + 2 * mvaID return mvaID
[docs]def load_photonid_mva_run3(fname: str) -> Optional[xgboost.Booster]: """ Reads and returns both the EB and EE Xgboost run3 mvaID models """ photonid_mva_EB = xgboost.Booster() photonid_mva_EB.load_model(fname + 'Egamma_Run3_photonIDMVA_EB.json') photonid_mva_EE = xgboost.Booster() photonid_mva_EE.load_model(fname + 'Egamma_Run3_photonIDMVA_EE.json') return photonid_mva_EB, photonid_mva_EE
[docs]def calculate_photonid_mva_run3( mva: Tuple[Optional[xgboost.Booster], List[str]], photon: ak.Array, rho, ) -> ak.Array: """Recompute PhotonIDMVA on-the-fly. This step is necessary considering that the inputs have to be corrected with the QRC process. Following is the list of features (barrel has 12, endcap two more): EB: events.Photon.energyRaw events.Photon.r9 events.Photon.sieie events.Photon.etaWidth events.Photon.phiWidth events.Photon.sieip events.Photon.s4 events.photon.hoe probe_ecalPFClusterIso probe_trkSumPtHollowConeDR03 probe_trkSumPtSolidConeDR04 probe_pfChargedIso probe_pfChargedIsoWorstVtx events.Photon.ScEta events.fixedGridRhoAll EE: + events.Photon.energyRaw events.Photon.r9 events.Photon.sieie events.Photon.etaWidth events.Photon.phiWidth events.Photon.sieip events.Photon.s4 events.photon.hoe probe_ecalPFClusterIso probe_hcalPFClusterIso probe_trkSumPtHollowConeDR03 probe_trkSumPtSolidConeDR04 probe_pfChargedIso probe_pfChargedIsoWorstVtx events.Photon.ScEta events.fixedGridRhoAll events.Photon.esEffSigmaRR events.Photon.esEnergyOverRawE """ photon["fixedGridRhoAll"] = numpy.nan_to_num(rho) photon = numpy.nan_to_num(photon) photonid_mva, var_order = mva if photonid_mva is None: return ak.ones_like(photon.pt) bdt_inputs = {} bdt_inputs = numpy.column_stack( [ak.to_numpy(photon[name]) for name in var_order] ) tempmatrix = xgboost.DMatrix(bdt_inputs) mvaID = photonid_mva.predict(tempmatrix) # Only needed to compare to TMVA mvaID = 1.0 - 2.0 / (1.0 + numpy.exp(2.0 * mvaID)) return mvaID