Coverage for orcasong/extractors/neutrino_chain.py: 55%

156 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-10-03 18:23 +0000

1""" 

2Functions that extract info from a blob for the mc_info / y datafield 

3in the h5 files. 

4 

5These are made for the specific given runs. They might not be 

6applicable to other data, and could cause errors or produce unexpected 

7results when used on data other then the specified. Check for example the  

8primary position in the mc_tracks. 

9 

10""" 

11 

12import warnings 

13import numpy as np 

14from km3pipe.io.hdf5 import HDF5Header 

15from h5py import File 

16import os, re 

17 

18__author__ = "Daniel Guderian" 

19 

20 

21def get_std_reco(blob, rec_types, rec_parameters_names): 

22 

23 """ 

24 Function to extract std reco info. This implementation requires h5 files 

25 to be processed with the option "--best_tracks" which adds the selection 

26 of best tracks for each reco type to the output using the km3io tools. 

27 

28 Returns 

29 ------- 

30 std_reco_info : dict 

31 Dict with the std reco info of the best tracks. 

32 

33 """ 

34 # this dict will be filled up 

35 std_reco_info = {} 

36 

37 # all known reco types to iterate over 

38 reco_type_dict = { 

39 "BestJmuon": ("jmuon_", "best_jmuon"), 

40 "BestJshower": ("jshower_", "best_jshower"), 

41 "BestDusjshower": ("dusjshower_", "best_dusjshower"), 

42 "BestAashower": ("aashower_", "best_aashower"), 

43 } 

44 

45 for name_in_blob, (identifier, best_track_name) in reco_type_dict.items(): 

46 

47 # always write out something for the generally present rec types 

48 if best_track_name in rec_types: 

49 

50 # specific names are with the prefix from the rec type 

51 specific_reco_names = np.core.defchararray.add( 

52 identifier, rec_parameters_names 

53 ) 

54 

55 # extract actually present info 

56 if name_in_blob in blob: 

57 

58 # get the previously identified best track 

59 bt = blob[name_in_blob] 

60 

61 # get all its values 

62 values = bt.item() 

63 values_list = list(values) 

64 # reco_names = bt.dtype.names #in case the fitinf and stuff will be tailored to the reco types 

65 # at some point, get the names directly like this 

66 

67 # in case there is no reco for this event but the reco type was done in general 

68 else: 

69 

70 # fill all values with nan's 

71 values_array = np.empty(len(specific_reco_names)) 

72 values_array[:] = np.nan 

73 values_list = values_array.tolist() 

74 

75 # create a dict out of them 

76 keys_list = list(specific_reco_names) 

77 

78 zip_iterator = zip(keys_list, values_list) 

79 reco_dict = dict(zip_iterator) 

80 

81 # add this dict to the complete std reco collection 

82 std_reco_info.update(reco_dict) 

83 

84 return std_reco_info 

85 

86 

87def get_rec_types_in_file(file): 

88 

89 """ 

90 Checks and returns which rec types are in the file and thus need to be present 

91 in all best track and their fitinf information later. 

92 """ 

93 

94 # the known rec types 

95 rec_type_names = ["best_jmuon", "best_jshower", "best_dusjshower", "best_aashower"] 

96 

97 # all reco related objects in the file 

98 reco_objects_in_file = file["reco"].keys() 

99 

100 # check which ones are in there 

101 rec_types_in_file = [] 

102 for rec_type in rec_type_names: 

103 if rec_type in reco_objects_in_file: 

104 rec_types_in_file.append(rec_type) 

105 

106 # also get from here the list of dtype names that is share for all recos 

107 rec_parameters_names = file["reco"][rec_type].dtype.names 

108 

109 return rec_types_in_file, rec_parameters_names 

110 

111 

112def get_real_data_info_extr(input_file): 

113 

114 """ 

115 Wrapper function that includes the actual mc_info_extr 

116 for real data. There are no n_gen like in the neutrino case. 

117 

118 Parameters 

119 ---------- 

120 input_file : km3net data file 

121 Can be online or offline format. 

122 

123 Returns 

124 ------- 

125 mc_info_extr : function 

126 The actual mc_info_extr function that holds the extractions. 

127 

128 """ 

129 

130 # check if std reco is present 

131 f = File(input_file, "r") 

132 has_std_reco = "reco" in f.keys() 

133 

134 if has_std_reco: 

135 # also check, which rec types are present 

136 rec_types, rec_parameters_names = get_rec_types_in_file(f) 

137 

138 def mc_info_extr(blob): 

139 

140 """ 

141 Processes a blob and creates the y with mc_info and, if existing, std reco. 

142 

143 For this real data case it is only general event info, like the id. 

144 

145 Parameters 

146 ---------- 

147 blob : dict 

148 The blob from the pipeline. 

149 

150 Returns 

151 ------- 

152 track : dict 

153 Containing all the specified info the y should have. 

154 

155 """ 

156 

157 event_info = blob["EventInfo"][0] 

158 

159 # add n_hits info for the cut 

160 n_hits = len(blob["Hits"]) 

161 n_trig_hits = np.count_nonzero(blob["Hits"]["triggered"]) 

162 

163 track = { 

164 "event_id": event_info.event_id, 

165 "run_id": event_info.run_id, 

166 "trigger_mask": event_info.trigger_mask, 

167 "n_hits": n_hits, 

168 "n_trig_hits": n_trig_hits, 

169 } 

170 

171 # get all the std reco info 

172 if has_std_reco: 

173 

174 std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) 

175 

176 track.update(std_reco_info) 

177 

178 return track 

179 

180 return mc_info_extr 

181 

182 

183def get_random_noise_mc_info_extr(input_file): 

184 

185 """ 

186 Wrapper function that includes the actual mc_info_extr 

187 for random noise simulations. There are no n_gen like in the neutrino case. 

188 

189 Parameters 

190 ---------- 

191 input_file : km3net data file 

192 Can be online or offline format. 

193 

194 Returns 

195 ------- 

196 mc_info_extr : function 

197 The actual mc_info_extr function that holds the extractions. 

198 

199 """ 

200 

201 # check if std reco is present 

202 f = File(input_file, "r") 

203 has_std_reco = "reco" in f.keys() 

204 

205 if has_std_reco: 

206 # also check, which rec types are present 

207 rec_types, rec_parameters_names = get_rec_types_in_file(f) 

208 

209 # an identifier for what the part of the mc simulation this was 

210 # this way, events can later be unambiguously identified 

211 input_filename_string = os.path.basename(input_file) 

212 part_number = re.findall(r"\d+", input_filename_string)[ 

213 -2 

214 ] # second last because of .h5 

215 

216 # dummy value for concatenation 

217 tau_topology = 3 

218 

219 def mc_info_extr(blob): 

220 

221 """ 

222 Processes a blob and creates the y with mc_info and, if existing, std reco. 

223 

224 For this random noise case it is only general event info, like the id. 

225 

226 Parameters 

227 ---------- 

228 blob : dict 

229 The blob from the pipeline. 

230 

231 Returns 

232 ------- 

233 track : dict 

234 Containing all the specified info the y should have. 

235 

236 """ 

237 event_info = blob["EventInfo"] 

238 

239 # add also the nhits info 

240 n_hits = len(blob["Hits"]) 

241 n_trig_hits = np.count_nonzero(blob["Hits"]["triggered"]) 

242 

243 track = { 

244 "event_id": event_info.event_id[0], 

245 "run_id": event_info.run_id[0], 

246 "particle_type": 0, 

247 "part_number": part_number, 

248 "n_hits": n_hits, 

249 "n_trig_hits": n_trig_hits, 

250 "tau_topology": tau_topology, 

251 } 

252 

253 # get all the std reco info 

254 if has_std_reco: 

255 

256 std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) 

257 

258 track.update(std_reco_info) 

259 

260 return track 

261 

262 return mc_info_extr 

263 

264 

265def get_neutrino_mc_info_extr(input_file,prod_identifier=999): 

266 

267 """ 

268 Wrapper function that includes the actual mc_info_extr 

269 for neutrino simulations. The n_gen parameter, needed for neutrino weighting 

270 is extracted from the header of the file. 

271 

272 Parameters 

273 ---------- 

274 input_file : km3net data file 

275 Can be online or offline format. 

276 prod_identifier : int 

277 An internal, unofficial identifier to mark the neutrino production. This has to be 

278 defined in a dict before.  

279 

280 Returns 

281 ------- 

282 mc_info_extr : function 

283 The actual mc_info_extr function that holds the extractions. 

284 

285 """ 

286 

287 # check if std reco is present 

288 f = File(input_file, "r") 

289 has_std_reco = "reco" in f.keys() 

290 

291 if has_std_reco: 

292 # also check, which rec types are present 

293 rec_types, rec_parameters_names = get_rec_types_in_file(f) 

294 

295 # get the n_gen 

296 header = HDF5Header.from_hdf5(input_file) 

297 n_gen = header.genvol.numberOfEvents 

298 

299 # an identifier for what the part of the mc simulation this was 

300 # this way, events can later be unambiguously identified 

301 input_filename_string = os.path.basename(input_file) 

302 try: 

303 part_number = re.findall(r"\d+", input_filename_string)[ 

304 -2 

305 ] # second last because of .h5 - works only for officially named files 

306 except IndexError: 

307 part_number = 0 

308 

309 def mc_info_extr(blob): 

310 

311 """ 

312 Processes a blob and creates the y with mc_info and, if existing, std reco. 

313 

314 For this neutrino case it is the full mc info for the primary neutrino; there are the several "McTracks": 

315 check the simulation which index "p" the neutrino has. 

316 

317 Parameters 

318 ---------- 

319 blob : dict 

320 The blob from the pipeline. 

321 

322 Returns 

323 ------- 

324 track : dict 

325 Containing all the specified info the y should have. 

326 

327 """ 

328 

329 # get general info about the event 

330 event_info = blob["EventInfo"] 

331 event_id = event_info.event_id[0] 

332 run_id = event_info.run_id[0] 

333 

334 # weights for neutrino analysis 

335 weight_w1 = event_info.weight_w1[0] 

336 weight_w2 = event_info.weight_w2[0] 

337 weight_w3 = event_info.weight_w3[0] 

338 

339 is_cc = event_info.W2LIST_GSEAGEN_CC[0] 

340 bjorkeny = event_info.W2LIST_GSEAGEN_BY[0] 

341 

342 # first, look for the particular neutrino index of the production 

343 p = 0 # for ORCA4 (and probably subsequent productions) 

344 

345 primary_mc_track = blob["McTracks"][p] 

346 

347 # some track mc truth info 

348 particle_type = primary_mc_track.pdgid # sometimes type, sometimes pdgid 

349 energy = primary_mc_track.energy 

350 dir_x, dir_y, dir_z = ( 

351 primary_mc_track.dir_x, 

352 primary_mc_track.dir_y, 

353 primary_mc_track.dir_z, 

354 ) 

355 time_interaction = ( 

356 primary_mc_track.time 

357 ) # actually always 0 for primary neutrino, measured in MC time 

358 vertex_pos_x, vertex_pos_y, vertex_pos_z = ( 

359 primary_mc_track.pos_x, 

360 primary_mc_track.pos_y, 

361 primary_mc_track.pos_z, 

362 ) 

363 

364 # for (muon) NC interactions, the visible energy is different 

365 if np.abs(particle_type) == 14 and is_cc == 3: 

366 visible_energy = energy * bjorkeny 

367 else: 

368 visible_energy = energy 

369 

370 # for tau CC it is not clear what the second interaction is; 1 for shower, 2 for track, 3 for nothing 

371 tau_topology = 3 

372 if np.abs(particle_type) == 16: 

373 if 13 in np.abs(blob["McTracks"].pdgid): 

374 tau_topology = 2 

375 else: 

376 tau_topology = 1 

377 

378 # add also the nhits info 

379 n_hits = len(blob["Hits"]) 

380 n_trig_hits = np.count_nonzero(blob["Hits"]["triggered"]) 

381 

382 track = { 

383 "event_id": event_id, 

384 "particle_type": particle_type, 

385 "energy": energy, 

386 "visible_energy": visible_energy, 

387 "is_cc": is_cc, 

388 "bjorkeny": bjorkeny, 

389 "dir_x": dir_x, 

390 "dir_y": dir_y, 

391 "dir_z": dir_z, 

392 "time_interaction": time_interaction, 

393 "run_id": run_id, 

394 "vertex_pos_x": vertex_pos_x, 

395 "vertex_pos_y": vertex_pos_y, 

396 "vertex_pos_z": vertex_pos_z, 

397 "n_hits": n_hits, 

398 "n_trig_hits": n_trig_hits, 

399 "weight_w1": weight_w1, 

400 "weight_w2": weight_w2, 

401 "weight_w3": weight_w3, 

402 "n_gen": n_gen, 

403 "part_number": part_number, 

404 "tau_topology": tau_topology, 

405 "prod_identifier": prod_identifier, 

406 } 

407 

408 # get all the std reco info 

409 if has_std_reco: 

410 

411 std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) 

412 

413 track.update(std_reco_info) 

414 

415 return track 

416 

417 return mc_info_extr 

418 

419 

420# function used by Stefan to identify which muons leave how many mc hits in the (active) detector. 

421def get_mchits_per_muon(blob, inactive_du=None): 

422 

423 """ 

424 For each muon in McTracks, get the number of McHits. 

425 Parameters 

426 ---------- 

427 blob 

428 The blob. 

429 inactive_du : int, optional 

430 McHits in this DU will not be counted. 

431 

432 Returns 

433 ------- 

434 np.array 

435 n_mchits, len = number of muons 

436 

437 """ 

438 ids = blob["McTracks"]["id"] 

439 

440 # Origin of each mchit (as int) in the active line 

441 origin = blob["McHits"]["origin"] 

442 

443 if inactive_du: 

444 # only hits in active line 

445 origin = origin[blob["McHits"]["du"] != inactive_du] 

446 

447 # get how many mchits were produced per muon in the bundle 

448 origin_dict = dict(zip(*np.unique(origin, return_counts=True))) 

449 

450 return np.array([origin_dict.get(i, 0) for i in ids]) 

451 

452 

453def get_muon_mc_info_extr(input_file, inactive_du=None): 

454 

455 """ 

456 Wrapper function that includes the actual mc_info_extr 

457 for atm. muon simulations. There are no n_gen like in the neutrino case. 

458 

459 Parameters 

460 ---------- 

461 input_file : km3net data file 

462 Can be online or offline format. 

463 

464 Returns 

465 ------- 

466 mc_info_extr : function 

467 The actual mc_info_extr function that holds the extractions. 

468 

469 """ 

470 

471 # check if std reco is present 

472 f = File(input_file, "r") 

473 has_std_reco = "reco" in f.keys() 

474 

475 if has_std_reco: 

476 # also check, which rec types are present 

477 rec_types, rec_parameters_names = get_rec_types_in_file(f) 

478 

479 # no n_gen here, but needed for concatenation 

480 n_gen = 1 

481 

482 # an identifier for what the part of the mc simulation this was 

483 # this way, events can later be unambiguously identified 

484 input_filename_string = os.path.basename(input_file) 

485 part_number = re.findall(r"\d+", input_filename_string)[ 

486 -2 

487 ] # second last because of .h5 

488 

489 # dummy value for concatenation 

490 tau_topology = 3 

491 

492 def mc_info_extr(blob): 

493 

494 """ 

495 Processes a blob and creates the y with mc_info and, if existing, std reco. 

496 

497 For this atm. muon case it is the full mc info for the primary; there are the several "McTracks": 

498 check the simulation to understand what "p" you want. Muons come in bundles that have the same direction. 

499 For energy: sum of all muons in a bundle, 

500 for vertex: weighted (energy) mean of the individual vertices . 

501 

502 Parameters 

503 ---------- 

504 blob : dict 

505 The blob from the pipeline. 

506 

507 Returns 

508 ------- 

509 track : dict 

510 Containing all the specified info the y should have. 

511 

512 """ 

513 

514 event_id = blob["EventInfo"].event_id[0] 

515 run_id = blob["EventInfo"].run_id[0] 

516 

517 p = 0 # for ORCA4 (and probably subsequent productions) 

518 

519 primary_mc_track = blob["McTracks"][p] 

520 

521 particle_type = 13 # fixed for all muons 

522 is_cc = 0 # set to 0 

523 bjorkeny = 0 # set to zero 

524 

525 time_interaction = primary_mc_track.time # same for all muons in a bundle 

526 

527 # sum up the energy from all muons that have at least x mc hits 

528 n_hits_per_muon = get_mchits_per_muon( 

529 blob, inactive_du=inactive_du 

530 ) # DU1 in ORCA4 is in the detx but not powered 

531 

532 # dont consider muons with less than 15 mc hits 

533 suficient_hits_mask = n_hits_per_muon >= 15 

534 energy = np.sum(blob["McTracks"][suficient_hits_mask].energy) 

535 # instead, assign them a small energy for consistency 

536 if energy == 0: 

537 energy = 40 

538 

539 # also add a visible energy here, which is not further defined 

540 visible_energy = energy 

541 

542 # all muons in a bundle are parallel, so just take dir of first muon 

543 dir_x, dir_y, dir_z = ( 

544 primary_mc_track.dir_x, 

545 primary_mc_track.dir_y, 

546 primary_mc_track.dir_z, 

547 ) 

548 

549 # vertex is the weighted (energy) mean of the individual vertices 

550 vertex_pos_x = np.average( 

551 blob["McTracks"][p:].pos_x, weights=blob["McTracks"][p:].energy 

552 ) 

553 vertex_pos_y = np.average( 

554 blob["McTracks"][p:].pos_y, weights=blob["McTracks"][p:].energy 

555 ) 

556 vertex_pos_z = np.average( 

557 blob["McTracks"][p:].pos_z, weights=blob["McTracks"][p:].energy 

558 ) 

559 

560 # add also the nhits info 

561 n_hits = len(blob["Hits"]) 

562 n_trig_hits = np.count_nonzero(blob["Hits"]["triggered"]) 

563 

564 # this is only relevant for neutrinos, though dummy info is needed for the concatenation 

565 weight_w1 = 10 

566 weight_w2 = 10 

567 weight_w3 = 10 

568 

569 track = { 

570 "event_id": event_id, 

571 "particle_type": particle_type, 

572 "energy": energy, 

573 "visible_energy": visible_energy, 

574 "is_cc": is_cc, 

575 "bjorkeny": bjorkeny, 

576 "dir_x": dir_x, 

577 "dir_y": dir_y, 

578 "dir_z": dir_z, 

579 "time_interaction": time_interaction, 

580 "run_id": run_id, 

581 "vertex_pos_x": vertex_pos_x, 

582 "vertex_pos_y": vertex_pos_y, 

583 "vertex_pos_z": vertex_pos_z, 

584 "n_hits": n_hits, 

585 "n_trig_hits": n_trig_hits, 

586 "weight_w1": weight_w1, 

587 "weight_w2": weight_w2, 

588 "weight_w3": weight_w3, 

589 "n_gen": n_gen, 

590 "part_number": part_number, 

591 "tau_topology": tau_topology, 

592 } 

593 

594 # get all the std reco info 

595 if has_std_reco: 

596 

597 std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) 

598 

599 track.update(std_reco_info) 

600 

601 return track 

602 

603 return mc_info_extr