Coverage for orcasong/plotting/plot_binstats.py: 25%

105 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-10-03 18:23 +0000

1""" 

2Run with a parser to plot the binning statistics. 

3Functions for plotting the bin stats made by the BinningStatsMaker module. 

4""" 

5 

6import os 

7import warnings 

8import argparse 

9import matplotlib 

10 

11matplotlib.use("pdf") 

12import matplotlib.pyplot as plt 

13from matplotlib.backends.backend_pdf import PdfPages 

14import h5py 

15import numpy as np 

16 

17 

18__author__ = "Stefan Reck" 

19 

20 

21def plot_hists(hists_list, save_to, plot_bin_edges=True): 

22 """ 

23 Plot histograms made by the BinningStatsMaker to the given pdf path. 

24 

25 Parameters 

26 ---------- 

27 hists_list : List 

28 Can also be a list of dicts (for multiple files), which will be 

29 combined to one plot. 

30 Each dict has info about the binning, generated by the BinningStatsMaker. 

31 save_to : str 

32 Where to save the plot to. 

33 plot_bin_edges : bool 

34 If true, will plot the bin edges as horizontal lines. Is never used 

35 for the time binning (field name "time"). 

36 

37 """ 

38 hists = combine_hists(hists_list) 

39 

40 if os.path.exists(save_to): 

41 raise FileExistsError(f"File {save_to} exists already!") 

42 

43 with PdfPages(save_to) as pdf_file: 

44 for bin_name, hists_data in hists.items(): 

45 hist_bin_edges = hists_data["hist_bin_edges"] 

46 bin_edges = hists_data["bin_edges"] 

47 hist = hists_data["hist"] 

48 cut_off = hists_data["cut_off"] 

49 total_hits = np.sum(hist) + np.sum(cut_off) 

50 

51 bin_widths = np.diff(hist_bin_edges) 

52 hist_prob = hist / bin_widths / np.sum(hist) 

53 

54 fig, ax = plt.subplots() 

55 plt.bar(hist_bin_edges[:-1], hist_prob, align="edge", width=bin_widths) 

56 

57 if plot_bin_edges and bin_name != "time": 

58 for bin_edge in bin_edges: 

59 plt.axvline( 

60 x=bin_edge, color="grey", linestyle="-", linewidth=1, alpha=0.9 

61 ) 

62 

63 # place a text box in upper left in axes coords 

64 out_neg_rel = cut_off[0] / total_hits 

65 out_pos_rel = cut_off[1] / total_hits 

66 textstr = "Hits cut off:\nLeft: {:.1%}\nRight: {:.1%}".format( 

67 out_neg_rel, out_pos_rel 

68 ) 

69 props = dict(boxstyle="round", facecolor="white", alpha=0.9) 

70 ax.text( 

71 0.95, 

72 0.95, 

73 textstr, 

74 transform=ax.transAxes, 

75 verticalalignment="top", 

76 bbox=props, 

77 horizontalalignment="right", 

78 ) 

79 

80 # the auto ticks are nice even numbers 

81 ylims = [0, ax.get_yticks().max()] 

82 ax.set_ylim(ylims) 

83 

84 plt.xlabel(bin_name) 

85 plt.ylabel("Density of hits") 

86 

87 pdf_file.savefig(fig) 

88 

89 print("Saved binning plot to " + save_to) 

90 

91 

92def combine_hists(hists_list): 

93 """ 

94 Combine the hists for multiple files into a single big one. 

95 

96 Parameters 

97 ---------- 

98 hists_list : List 

99 A list of dicts (hists from the BinningStatsMaker for different files). 

100 

101 Returns 

102 ------- 

103 combined_hists : Dict 

104 The combined hist. 

105 

106 """ 

107 if len(hists_list) == 1: 

108 # just one entry, no need to combine 

109 return hists_list[0] 

110 

111 # initialize combined dict according to first dict in list 

112 combined_hists = {} 

113 for bin_name, hists_data in hists_list[0].items(): 

114 hist_bin_edges = hists_data["hist_bin_edges"] 

115 bin_edges = hists_data["bin_edges"] 

116 bin_name = bin_name 

117 

118 combined_hists[bin_name] = { 

119 "hist": np.zeros(len(hist_bin_edges) - 1), 

120 "hist_bin_edges": hist_bin_edges, 

121 "bin_edges": bin_edges, 

122 # below smallest edge, above largest edge: 

123 "cut_off": np.zeros(2), 

124 } 

125 

126 # add them together 

127 for hists in hists_list: 

128 for bin_name, hists_data in hists.items(): 

129 # name and bin edges must be the same for all hists 

130 if bin_name not in combined_hists: 

131 raise NameError( 

132 "Hists dont all have the same binning field name:" 

133 "{}".format(bin_name) 

134 ) 

135 

136 bin_edges = hists_data["bin_edges"] 

137 comb_edges = combined_hists[bin_name]["bin_edges"] 

138 if len(bin_edges) != len(comb_edges): 

139 raise ValueError( 

140 "Hists have different hist bin edges: {} vs {}".format( 

141 bin_edges, comb_edges 

142 ) 

143 ) 

144 

145 hist_bin_edges = hists_data["hist_bin_edges"] 

146 comb_hist_edges = combined_hists[bin_name]["hist_bin_edges"] 

147 if len(hist_bin_edges) != len(comb_hist_edges): 

148 raise ValueError( 

149 "Hists have different bin edges: {} vs {}".format( 

150 hist_bin_edges, comb_hist_edges 

151 ) 

152 ) 

153 

154 combined_hists[bin_name]["hist"] += hists_data["hist"] 

155 combined_hists[bin_name]["cut_off"] += hists_data["cut_off"] 

156 

157 return combined_hists 

158 

159 

160def add_hists_to_h5file(hists, f): 

161 """ 

162 Add the binning stats as groups to a h5 file. 

163 

164 Parameters 

165 ---------- 

166 hists : dict 

167 The histst from the BinningStatsMaker module. 

168 f 

169 The opened h5 file. 

170 

171 """ 

172 for bin_name, hists_data in hists.items(): 

173 for key, val in hists_data.items(): 

174 f.create_dataset(f"bin_stats/{bin_name}/{key}", data=val) 

175 

176 

177def read_hists_from_h5file(f): 

178 """Read the binning stats from the file, put it in dicts.""" 

179 data = {} 

180 for bin_name, hists_data in f["bin_stats/"].items(): 

181 data[bin_name] = {k: v[()] for k, v in hists_data.items()} 

182 return data 

183 

184 

185def plot_hist_of_files(save_as, files=None): 

186 """ 

187 Plot the binning stats of all h5 files in the current directory. 

188 

189 Parameters 

190 ---------- 

191 save_as : str 

192 Where to save the plot to. 

193 files : List, optional 

194 Path of files to use instead. 

195 

196 """ 

197 if not files: 

198 files = get_all_h5_files() 

199 

200 hists_list = [] 

201 print("Plotting stats of {} file(s)".format(len(files))) 

202 

203 for i, file in enumerate(files, start=1): 

204 if i % 100 == 0: 

205 print(f"Reading file {i} / {len(files)} ...") 

206 try: 

207 with h5py.File(file, "r") as f: 

208 hists_list.append(read_hists_from_h5file(f)) 

209 except KeyError: 

210 warnings.warn(f"ERROR: No bin_stats in {file}. Skipping ...") 

211 continue 

212 if len(hists_list) == 0: 

213 raise ValueError("No files with bin_stats found!") 

214 print("Plotting...") 

215 plot_hists(hists_list, save_as) 

216 

217 

218def get_all_h5_files(): 

219 """Get a list of all h5 files in the cwd.""" 

220 files = [] 

221 for file in os.listdir(os.getcwd()): 

222 if file.endswith(".h5"): 

223 files.append(file) 

224 return files 

225 

226 

227def main(): 

228 # TODO deprecated 

229 raise NotImplementedError( 

230 "plot_binstats has been renamed to orcasong plot_binstats" 

231 ) 

232 

233 

234def add_parser(subparsers): 

235 parser = subparsers.add_parser( 

236 "plot_binstats", 

237 description="Generate a plot with statistics of the binning. " 

238 "Can only be used on files generated with the FileBinner when " 

239 "add_bin_stats was set to true (default). ", 

240 ) 

241 parser.add_argument( 

242 "--save_as", 

243 type=str, 

244 default="bin_stats_plot.pdf", 

245 help="Filename of the plot. Default: bin_stats_plot.pdf.", 

246 ) 

247 parser.add_argument( 

248 "files", 

249 type=str, 

250 nargs="*", 

251 default=None, 

252 help="File(s) to plot. Default: Plot for all h5 files in current dir.", 

253 ) 

254 parser.set_defaults(func=plot_hist_of_files) 

255 

256 

257if __name__ == "__main__": 

258 main()