Source code for higgs_dna.scripts.plotting.plotting_utils

# Hist to plot sigma_m
import os 
import awkward as ak
import numpy as np
import matplotlib.pyplot as plt 
import mplhep, hist
plt.style.use([mplhep.style.CMS])
import pandas as pd
import scipy
import mplhep as hep
import glob
import warnings

import json

warnings.filterwarnings("ignore")

[docs]def plotter( mc_hist, mc_hist_up, mc_hist_down , data_hist, output_filename, flow_bins='none' , xlabel = 'myVariable', yscale='linear', IspostEE=False, IsBarrel=False): fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1], 'hspace': 0.03}, sharex=True) stat_errors = np.sqrt(mc_hist.variances()) data_stat_error = np.sqrt(data_hist.variances()) # plotting nominal and data histograms hep.histplot( mc_hist, label = r'$Z \rightarrow ee$ - simulation', yerr = stat_errors, density = True, linewidth=3, ax=ax[0], flow=flow_bins ) hep.histplot( data_hist, label = "Data", yerr= data_stat_error, density = True, color="black", linewidth=3, histtype='errorbar', markersize=12, elinewidth=3, alpha=1, ax=ax[0], flow=flow_bins ) ax[0].set_xlabel('') ax[0].margins(y=0.15) ax[0].set_ylim(0, 1.1*ax[0].get_ylim()[1]) ax[0].tick_params(labelsize=22) # Ploting the systematic bands around the nominal sigma_m curve data_hist_numpy = data_hist.to_numpy() mc_hist_numpy = mc_hist.to_numpy() # Calculating the integrals to normalize the systematic yields, since it is a density that is being plotted! integral_mc_down = mc_hist_down.sum().value * (mc_hist_down.to_numpy()[1][1] - mc_hist_down.to_numpy()[1][0]) integral_mc_up = mc_hist_up.sum().value * (mc_hist_up.to_numpy()[1][1] - mc_hist_up.to_numpy()[1][0]) # Lower and upper bounds of the systematic band - normalized to one lower_bound = mc_hist_down.to_numpy()[0]/integral_mc_down upper_bound = mc_hist_up.to_numpy()[0]/integral_mc_up # Using the matplotlib .fill_between() to plot the bands around the nominal variation ax[0].fill_between(data_hist_numpy[1][:-1], lower_bound, upper_bound, step='post', hatch='XXX', alpha=0.9, facecolor="none", edgecolor="tab:purple", linewidth=0) # line at 1 in the ratio plot ax[1].axhline(1, 0, 1, label=None, linestyle='--', color="black", linewidth=1) # Integrals of data and nominal mc for ratio calculation integral_data = data_hist.sum().value * (data_hist_numpy[1][1] - data_hist_numpy[1][0]) integral_mc = mc_hist.sum().value * (mc_hist_numpy[1][1] - mc_hist_numpy[1][0]) # to numpy histos mc_hist_numpy = mc_hist.to_numpy() mc_hist_up_numpy = mc_hist_up.to_numpy() mc_hist_down_numpy = mc_hist_down.to_numpy() integral_mc_up = mc_hist_up.sum().value * (mc_hist_up_numpy[1][1] - mc_hist_up_numpy[1][0]) error_up = np.abs(mc_hist_up_numpy[0]/integral_mc_up - mc_hist_numpy[0]/integral_mc)/(mc_hist_numpy[0]/integral_mc) integral_mc_down = mc_hist_down.sum().value * (mc_hist_down_numpy[1][1] - mc_hist_down_numpy[1][0]) error_down = np.abs(mc_hist_down_numpy[0]/integral_mc_down - mc_hist_numpy[0]/integral_mc)/(mc_hist_numpy[0]/integral_mc) # Plotting the bands around 1 lower_bound = 1 - error_down upper_bound = 1 + error_up # Plot the hatched region ax[1].fill_between(data_hist_numpy[1][:-1], lower_bound, upper_bound, step='post', hatch='XXX', alpha=0.9, facecolor="none", edgecolor="tab:purple", linewidth=0 ) # Calculating the ratio betwenn nominal mc and data and the stat error of mc + data values1, variances1 = data_hist.values(), data_hist.variances() values2, variances2 = mc_hist.values() , mc_hist.variances() # Calculate the ratio ratio = (values1/np.sum(values1)) / (values2/np.sum(values2)) # Error from both data stat and MC stat error_ratio = np.sqrt((np.sqrt(variances2) / values2)**2 + (np.sqrt(variances1) / values1))**2 error_ratio[error_ratio <= 0] = 0 hep.histplot( ratio[:-1], bins=data_hist_numpy[1][:-1], label=None, color="black", histtype='errorbar', yerr= error_ratio[:-1], markersize=12, elinewidth=3, alpha=1, ax=ax[1] ) # Setting the labels ax[0].set_ylabel("Normalised density", fontsize=26) ax[1].set_ylabel("Data / MC", fontsize=26) ax[1].set_xlabel(xlabel, fontsize=26, usetex=True) # Some axes configurations ax[0].tick_params(labelsize=24) ax[0].set_yscale(yscale) ax[1].set_ylim(0., 1.1*ax[0].get_ylim()[1]) ax[1].set_ylim(0.6, 1.4) ax[0].legend(loc="upper right", fontsize=20) # Text indicating the region #if(IsBarrel): # ax[0].text(0.05, 0.95, r'EB-EB', transform=ax[0].transAxes, fontsize=20, verticalalignment='top') #else: # ax[0].text(0.05, 0.95, r'Not EB-EB', transform=ax[0].transAxes, fontsize=20, verticalalignment='top') # Setting the lumi labels if(IspostEE): hep.cms.label(data=True, ax=ax[0], loc=0, label="Private Work", com=13.6, lumi=27.0) else: hep.cms.label(data=True, ax=ax[0], loc=0, label="Private Work", com=13.6, lumi=8.1) plt.subplots_adjust(hspace=0.03) plt.tight_layout() fig.savefig(output_filename, bbox_inches='tight')
[docs]def create_mask(arr): data_mass = np.array(arr["mass"]) #mask data interval mask_mass = np.logical_and(data_mass > 80, data_mass < 100) mask_mass = np.logical_and(mask_mass, np.abs(np.array(arr["tag_eta"])) < 2.5 ) mask_mass = np.logical_and(mask_mass, np.abs(np.array(arr["tag_pt"])) > 42 ) mask_mass = np.logical_and(mask_mass, np.abs(np.array(arr["probe_pt"])) > 22 ) mask_mass = np.logical_and(mask_mass, np.abs(np.array(arr["probe_eta"])) < 2.5 ) #lets make the electron veto cut! mask_mass = np.logical_and(mask_mass, np.abs(np.array(arr["tag_electronVeto"]) ) == False ) mask_mass = np.logical_and(mask_mass, np.abs(np.array(arr["probe_electronVeto"]) ) == False ) # also, the cutBased ID! mask_mass = np.logical_and(mask_mass, np.array(arr["tag_cutBased"]) > 0 ) mask_mass = np.logical_and(mask_mass, np.array(arr["probe_cutBased"]) > 0 ) return mask_mass
[docs]def get_sigmaE_over_E(ak_arr, energyErr_field): return np.array(ak_arr[energyErr_field]/(ak_arr.probe_pt * np.cosh(ak_arr.probe_eta)))
[docs]def main(): # Read the plot_settings json to check what we want to plot with open('./plot_settings.json', 'r') as f: plot_settings = json.load(f) # We need to read the nominal up and down systs energy_Err_systs = ["nominal","energyErrShift_up","energyErrShift_down"] store_sigma_over_m_syst = [] store_sigma_over_m_corr_smeared_syst = [] store_sigma_over_m_corr_syst = [] store_sigma_over_m_smeared_syst = [] _weights = [] # for MC store_mc_ebeb_masks = [] store_mc_not_ebeb_masks = [] for target in plot_settings.keys(): print('INFO: Making the plot for', target, 'now...') target_settings = plot_settings[target] # Just a shortcut to avoid diving into deeply nested dictionaries ## Data ak_arr = ak.from_parquet(glob.glob(target_settings['files_data']))#, columns=target_settings['variable']) ak_arr = ak_arr[create_mask(ak_arr)] # Apply event selection if target == 'probe_energyErr_DIVbyE': data_arr = get_sigmaE_over_E(ak_arr, 'probe_energyErr') elif target == 'probe_corr_energyErr_DIVbyE': data_arr = get_sigmaE_over_E(ak_arr, 'probe_energyErr') # In Data, we do not have corrected variables elif target_settings['variable'] == 'sigma_m_over_m_corr': data_arr = ak_arr['sigma_m_over_m'] elif target_settings['variable'] == 'sigma_m_over_m_Smeared_corrected': data_arr = ak_arr['sigma_m_over_m_Smeared'] elif target_settings['variable'] == 'probe_corr_energyErr': data_arr = ak_arr['probe_energyErr'] else: data_arr = ak_arr[target_settings['variable']] data_weights = np.ones(len(data_arr)) MC_arr_list = [] # Needs to be re-initialised in each pass through the loop ## MC for variation in energy_Err_systs: print('Reading variation:', variation) ak_arr = ak.from_parquet(glob.glob(target_settings['files_MC'] + str(variation) + "/*.parquet")) ak_arr = ak_arr[create_mask(ak_arr)] if target == 'probe_energyErr_DIVbyE': MC_arr_list.append(get_sigmaE_over_E(ak_arr, 'probe_energyErr')) elif target == 'probe_corr_energyErr_DIVbyE': MC_arr_list.append(get_sigmaE_over_E(ak_arr, 'probe_corr_energyErr')) else: MC_arr_list.append(np.array(ak_arr[target_settings['variable']])) # Maybe a dict would be better suited... _weights.append(np.array(ak_arr["weight"])) binning = target_settings['binning'] data_hist = hist.new.Reg(binning[0], binning[1], binning[2]).Weight() mc_hist = hist.new.Reg(binning[0], binning[1], binning[2]).Weight() mc_hist_up = hist.new.Reg(binning[0], binning[1], binning[2]).Weight() mc_hist_down = hist.new.Reg(binning[0], binning[1], binning[2]).Weight() if target_settings['density'] == True: data_hist.fill(data_arr, weight = data_weights/np.sum(data_weights)) mc_hist.fill(MC_arr_list[0], weight = _weights[0]/np.sum(_weights[0])) mc_hist_up.fill(MC_arr_list[1], weight = _weights[1]/np.sum(_weights[1])) mc_hist_down.fill(MC_arr_list[2], weight = _weights[2]/np.sum(data_weights[2])) else: data_hist.fill(data_arr, weight = data_weights/np.sum(data_weights)) mc_hist.fill(MC_arr_list[0], weight = _weights[0]) mc_hist_up.fill(MC_arr_list[1], weight = _weights[1]) mc_hist_down.fill(MC_arr_list[2], weight = _weights[2]) plotter(mc_hist, mc_hist_up, mc_hist_down, data_hist, 'plots_smear/test/'+target+'.png', flow_bins = target_settings['flow_bins'], xlabel = target_settings['xlabel'], yscale=target_settings['yscale'])
if __name__ == "__main__": main()