Coverage for orcasong/parser.py: 0%
43 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-10-03 18:23 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2024-10-03 18:23 +0000
1"""
2Run OrcaSong functionalities from command line.
4"""
5import argparse
6from orcasong import version
7from orcasong.tools.concatenate import concatenate
8from orcasong.tools.postproc import postproc_file
9from orcasong.tools.shuffle2 import h5shuffle2
10import orcasong.from_toml as from_toml
11import orcasong.plotting.plot_binstats as plot_binstats
12import orcasong.tools.make_data_split as make_data_split
15def _add_parser_concatenate(subparsers):
16 parser = subparsers.add_parser(
17 "concatenate",
18 description="Concatenate many small h5 files to a single large one "
19 "in a km3pipe compatible format. This is intended for "
20 "files that get generated by orcasong, i.e. all datsets "
21 "should have the same length, with one row per "
22 "blob. "
23 "Compression options and the datasets to be created in "
24 "the new file will be read from the first input file.",
25 )
26 parser.add_argument(
27 "file",
28 type=str,
29 nargs="*",
30 help="Define the files to concatenate. If it's one argument: A txt list "
31 "with pathes of h5 files to concatenate (one path per line). "
32 "If it's multiple arguments: "
33 "The pathes of h5 files to concatenate.",
34 )
35 parser.add_argument(
36 "--outfile",
37 type=str,
38 default="concatenated.h5",
39 help="The absoulte filepath of the output .h5 file that will be created. ",
40 )
41 parser.add_argument(
42 "--no_used_files",
43 action="store_true",
44 help="Per default, the paths of the input files are added "
45 "as their own datagroup in the output file. Use this flag to "
46 "disable. ",
47 )
48 parser.add_argument(
49 "--skip_errors",
50 action="store_true",
51 help="If true, ignore files that can't be concatenated. ",
52 )
53 parser.set_defaults(func=concatenate)
56def _add_parser_h5shuffle(subparsers):
57 parser = subparsers.add_parser(
58 "h5shuffle",
59 description="Shuffle an h5 file using km3pipe.",
60 )
61 parser.add_argument("input_file", type=str, help="File to shuffle.")
62 parser.add_argument(
63 "--output_file",
64 type=str,
65 help="Name of output file. Default: Auto generate name.",
66 )
67 parser.add_argument(
68 "--delete", action="store_true", help="Delete original file afterwards."
69 )
70 parser.set_defaults(func=postproc_file)
73def _add_parser_h5shuffle2(subparsers):
74 parser = subparsers.add_parser(
75 "h5shuffle2",
76 description="Shuffle datasets in a h5file that have the same length. "
77 "Uses chunkwise readout for speed-up. If you run into memory errors, try"
78 "manually setting --max_ram to a smaller value.",
79 )
80 parser.add_argument(
81 "input_file", type=str, help="Path of the file that will be shuffled."
82 )
83 parser.add_argument(
84 "--output_file",
85 type=str,
86 default=None,
87 help="If given, this will be the name of the output file. "
88 "Default: input_file + suffix.",
89 )
90 parser.add_argument(
91 "--datasets",
92 type=str,
93 nargs="*",
94 default=("x", "y"),
95 help="Which datasets to include in output. Default: x, y",
96 )
97 parser.add_argument(
98 "--max_ram_fraction",
99 type=float,
100 default=0.25,
101 help="in [0, 1]. Only used when max_ram is not given. Fraction of all "
102 "available ram to use for reading one batch of data "
103 "Note: this should "
104 "be <=~0.25 or so, since lots of ram is needed for in-memory shuffling. "
105 "Default: 0.25",
106 )
107 parser.add_argument(
108 "--iterations",
109 type=int,
110 default=None,
111 help="Shuffle the file this many times. Default: Auto choose best number.",
112 )
113 parser.add_argument(
114 "--max_ram",
115 type=int,
116 default=None,
117 help="Available ram in bytes. Default: Use fraction of maximum "
118 "available instead (see max_ram_fraction).",
119 )
120 parser.set_defaults(func=h5shuffle2)
123def main():
124 parser = argparse.ArgumentParser(
125 prog="orcasong",
126 description=__doc__,
127 formatter_class=argparse.RawTextHelpFormatter,
128 )
129 parser.add_argument('--version', action='version', version=version)
131 subparsers = parser.add_subparsers()
132 from_toml.add_parser_run(subparsers)
133 _add_parser_concatenate(subparsers)
134 _add_parser_h5shuffle(subparsers)
135 _add_parser_h5shuffle2(subparsers)
136 plot_binstats.add_parser(subparsers)
137 make_data_split.add_parser(subparsers)
139 kwargs = vars(parser.parse_args())
140 func = kwargs.pop("func")
141 func(**kwargs)