Coverage for orcasong/plotting/plot_binstats.py: 25%
105 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-10-03 18:23 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2024-10-03 18:23 +0000
1"""
2Run with a parser to plot the binning statistics.
3Functions for plotting the bin stats made by the BinningStatsMaker module.
4"""
6import os
7import warnings
8import argparse
9import matplotlib
11matplotlib.use("pdf")
12import matplotlib.pyplot as plt
13from matplotlib.backends.backend_pdf import PdfPages
14import h5py
15import numpy as np
18__author__ = "Stefan Reck"
21def plot_hists(hists_list, save_to, plot_bin_edges=True):
22 """
23 Plot histograms made by the BinningStatsMaker to the given pdf path.
25 Parameters
26 ----------
27 hists_list : List
28 Can also be a list of dicts (for multiple files), which will be
29 combined to one plot.
30 Each dict has info about the binning, generated by the BinningStatsMaker.
31 save_to : str
32 Where to save the plot to.
33 plot_bin_edges : bool
34 If true, will plot the bin edges as horizontal lines. Is never used
35 for the time binning (field name "time").
37 """
38 hists = combine_hists(hists_list)
40 if os.path.exists(save_to):
41 raise FileExistsError(f"File {save_to} exists already!")
43 with PdfPages(save_to) as pdf_file:
44 for bin_name, hists_data in hists.items():
45 hist_bin_edges = hists_data["hist_bin_edges"]
46 bin_edges = hists_data["bin_edges"]
47 hist = hists_data["hist"]
48 cut_off = hists_data["cut_off"]
49 total_hits = np.sum(hist) + np.sum(cut_off)
51 bin_widths = np.diff(hist_bin_edges)
52 hist_prob = hist / bin_widths / np.sum(hist)
54 fig, ax = plt.subplots()
55 plt.bar(hist_bin_edges[:-1], hist_prob, align="edge", width=bin_widths)
57 if plot_bin_edges and bin_name != "time":
58 for bin_edge in bin_edges:
59 plt.axvline(
60 x=bin_edge, color="grey", linestyle="-", linewidth=1, alpha=0.9
61 )
63 # place a text box in upper left in axes coords
64 out_neg_rel = cut_off[0] / total_hits
65 out_pos_rel = cut_off[1] / total_hits
66 textstr = "Hits cut off:\nLeft: {:.1%}\nRight: {:.1%}".format(
67 out_neg_rel, out_pos_rel
68 )
69 props = dict(boxstyle="round", facecolor="white", alpha=0.9)
70 ax.text(
71 0.95,
72 0.95,
73 textstr,
74 transform=ax.transAxes,
75 verticalalignment="top",
76 bbox=props,
77 horizontalalignment="right",
78 )
80 # the auto ticks are nice even numbers
81 ylims = [0, ax.get_yticks().max()]
82 ax.set_ylim(ylims)
84 plt.xlabel(bin_name)
85 plt.ylabel("Density of hits")
87 pdf_file.savefig(fig)
89 print("Saved binning plot to " + save_to)
92def combine_hists(hists_list):
93 """
94 Combine the hists for multiple files into a single big one.
96 Parameters
97 ----------
98 hists_list : List
99 A list of dicts (hists from the BinningStatsMaker for different files).
101 Returns
102 -------
103 combined_hists : Dict
104 The combined hist.
106 """
107 if len(hists_list) == 1:
108 # just one entry, no need to combine
109 return hists_list[0]
111 # initialize combined dict according to first dict in list
112 combined_hists = {}
113 for bin_name, hists_data in hists_list[0].items():
114 hist_bin_edges = hists_data["hist_bin_edges"]
115 bin_edges = hists_data["bin_edges"]
116 bin_name = bin_name
118 combined_hists[bin_name] = {
119 "hist": np.zeros(len(hist_bin_edges) - 1),
120 "hist_bin_edges": hist_bin_edges,
121 "bin_edges": bin_edges,
122 # below smallest edge, above largest edge:
123 "cut_off": np.zeros(2),
124 }
126 # add them together
127 for hists in hists_list:
128 for bin_name, hists_data in hists.items():
129 # name and bin edges must be the same for all hists
130 if bin_name not in combined_hists:
131 raise NameError(
132 "Hists dont all have the same binning field name:"
133 "{}".format(bin_name)
134 )
136 bin_edges = hists_data["bin_edges"]
137 comb_edges = combined_hists[bin_name]["bin_edges"]
138 if len(bin_edges) != len(comb_edges):
139 raise ValueError(
140 "Hists have different hist bin edges: {} vs {}".format(
141 bin_edges, comb_edges
142 )
143 )
145 hist_bin_edges = hists_data["hist_bin_edges"]
146 comb_hist_edges = combined_hists[bin_name]["hist_bin_edges"]
147 if len(hist_bin_edges) != len(comb_hist_edges):
148 raise ValueError(
149 "Hists have different bin edges: {} vs {}".format(
150 hist_bin_edges, comb_hist_edges
151 )
152 )
154 combined_hists[bin_name]["hist"] += hists_data["hist"]
155 combined_hists[bin_name]["cut_off"] += hists_data["cut_off"]
157 return combined_hists
160def add_hists_to_h5file(hists, f):
161 """
162 Add the binning stats as groups to a h5 file.
164 Parameters
165 ----------
166 hists : dict
167 The histst from the BinningStatsMaker module.
168 f
169 The opened h5 file.
171 """
172 for bin_name, hists_data in hists.items():
173 for key, val in hists_data.items():
174 f.create_dataset(f"bin_stats/{bin_name}/{key}", data=val)
177def read_hists_from_h5file(f):
178 """Read the binning stats from the file, put it in dicts."""
179 data = {}
180 for bin_name, hists_data in f["bin_stats/"].items():
181 data[bin_name] = {k: v[()] for k, v in hists_data.items()}
182 return data
185def plot_hist_of_files(save_as, files=None):
186 """
187 Plot the binning stats of all h5 files in the current directory.
189 Parameters
190 ----------
191 save_as : str
192 Where to save the plot to.
193 files : List, optional
194 Path of files to use instead.
196 """
197 if not files:
198 files = get_all_h5_files()
200 hists_list = []
201 print("Plotting stats of {} file(s)".format(len(files)))
203 for i, file in enumerate(files, start=1):
204 if i % 100 == 0:
205 print(f"Reading file {i} / {len(files)} ...")
206 try:
207 with h5py.File(file, "r") as f:
208 hists_list.append(read_hists_from_h5file(f))
209 except KeyError:
210 warnings.warn(f"ERROR: No bin_stats in {file}. Skipping ...")
211 continue
212 if len(hists_list) == 0:
213 raise ValueError("No files with bin_stats found!")
214 print("Plotting...")
215 plot_hists(hists_list, save_as)
218def get_all_h5_files():
219 """Get a list of all h5 files in the cwd."""
220 files = []
221 for file in os.listdir(os.getcwd()):
222 if file.endswith(".h5"):
223 files.append(file)
224 return files
227def main():
228 # TODO deprecated
229 raise NotImplementedError(
230 "plot_binstats has been renamed to orcasong plot_binstats"
231 )
234def add_parser(subparsers):
235 parser = subparsers.add_parser(
236 "plot_binstats",
237 description="Generate a plot with statistics of the binning. "
238 "Can only be used on files generated with the FileBinner when "
239 "add_bin_stats was set to true (default). ",
240 )
241 parser.add_argument(
242 "--save_as",
243 type=str,
244 default="bin_stats_plot.pdf",
245 help="Filename of the plot. Default: bin_stats_plot.pdf.",
246 )
247 parser.add_argument(
248 "files",
249 type=str,
250 nargs="*",
251 default=None,
252 help="File(s) to plot. Default: Plot for all h5 files in current dir.",
253 )
254 parser.set_defaults(func=plot_hist_of_files)
257if __name__ == "__main__":
258 main()