Basic Example

This short notebook shows how to get started with HiggsDNA and Coffea.

[1]:

from higgs_dna.utils.logger_utils import setup_logger
from higgs_dna.workflows import DYStudiesProcessor

from coffea import processor
import json
from importlib import resources

[6]:

fileset = {
    "DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8": [
        "samples/skimmed_nano/DYJetsToLL--UL17_v6-v2_109_Skim.root"
    ]
}

[7]:

with resources.open_text("higgs_dna.metaconditions", "Era2017_legacy_xgb_v1.json") as f:
    metaconditions = json.load(f)

[8]:

processor_instance = DYStudiesProcessor(
    metaconditions=metaconditions,
    do_systematics=False,
    apply_trigger=True,
    output_location="output/basics"
)

[9]:

iterative_run = processor.Runner(
    executor = processor.IterativeExecutor(compression=None),
    schema=processor.NanoAODSchema,
)

out = iterative_run(
    fileset,
    treename="Events",
    processor_instance=processor_instance,
)

/work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:216: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
  output[f"{prefix}_{subfield}"] = awkward.to_numpy(
/work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:220: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead.  To get a de-fragmented frame, use `newframe = frame.copy()`
  output[field] = awkward.to_numpy(diphotons[field])