Coverage for orcasong/parser.py: 0%

43 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-10-03 18:23 +0000

1""" 

2Run OrcaSong functionalities from command line. 

3 

4""" 

5import argparse 

6from orcasong import version 

7from orcasong.tools.concatenate import concatenate 

8from orcasong.tools.postproc import postproc_file 

9from orcasong.tools.shuffle2 import h5shuffle2 

10import orcasong.from_toml as from_toml 

11import orcasong.plotting.plot_binstats as plot_binstats 

12import orcasong.tools.make_data_split as make_data_split 

13 

14 

15def _add_parser_concatenate(subparsers): 

16 parser = subparsers.add_parser( 

17 "concatenate", 

18 description="Concatenate many small h5 files to a single large one " 

19 "in a km3pipe compatible format. This is intended for " 

20 "files that get generated by orcasong, i.e. all datsets " 

21 "should have the same length, with one row per " 

22 "blob. " 

23 "Compression options and the datasets to be created in " 

24 "the new file will be read from the first input file.", 

25 ) 

26 parser.add_argument( 

27 "file", 

28 type=str, 

29 nargs="*", 

30 help="Define the files to concatenate. If it's one argument: A txt list " 

31 "with pathes of h5 files to concatenate (one path per line). " 

32 "If it's multiple arguments: " 

33 "The pathes of h5 files to concatenate.", 

34 ) 

35 parser.add_argument( 

36 "--outfile", 

37 type=str, 

38 default="concatenated.h5", 

39 help="The absoulte filepath of the output .h5 file that will be created. ", 

40 ) 

41 parser.add_argument( 

42 "--no_used_files", 

43 action="store_true", 

44 help="Per default, the paths of the input files are added " 

45 "as their own datagroup in the output file. Use this flag to " 

46 "disable. ", 

47 ) 

48 parser.add_argument( 

49 "--skip_errors", 

50 action="store_true", 

51 help="If true, ignore files that can't be concatenated. ", 

52 ) 

53 parser.set_defaults(func=concatenate) 

54 

55 

56def _add_parser_h5shuffle(subparsers): 

57 parser = subparsers.add_parser( 

58 "h5shuffle", 

59 description="Shuffle an h5 file using km3pipe.", 

60 ) 

61 parser.add_argument("input_file", type=str, help="File to shuffle.") 

62 parser.add_argument( 

63 "--output_file", 

64 type=str, 

65 help="Name of output file. Default: Auto generate name.", 

66 ) 

67 parser.add_argument( 

68 "--delete", action="store_true", help="Delete original file afterwards." 

69 ) 

70 parser.set_defaults(func=postproc_file) 

71 

72 

73def _add_parser_h5shuffle2(subparsers): 

74 parser = subparsers.add_parser( 

75 "h5shuffle2", 

76 description="Shuffle datasets in a h5file that have the same length. " 

77 "Uses chunkwise readout for speed-up. If you run into memory errors, try" 

78 "manually setting --max_ram to a smaller value.", 

79 ) 

80 parser.add_argument( 

81 "input_file", type=str, help="Path of the file that will be shuffled." 

82 ) 

83 parser.add_argument( 

84 "--output_file", 

85 type=str, 

86 default=None, 

87 help="If given, this will be the name of the output file. " 

88 "Default: input_file + suffix.", 

89 ) 

90 parser.add_argument( 

91 "--datasets", 

92 type=str, 

93 nargs="*", 

94 default=("x", "y"), 

95 help="Which datasets to include in output. Default: x, y", 

96 ) 

97 parser.add_argument( 

98 "--max_ram_fraction", 

99 type=float, 

100 default=0.25, 

101 help="in [0, 1]. Only used when max_ram is not given. Fraction of all " 

102 "available ram to use for reading one batch of data " 

103 "Note: this should " 

104 "be <=~0.25 or so, since lots of ram is needed for in-memory shuffling. " 

105 "Default: 0.25", 

106 ) 

107 parser.add_argument( 

108 "--iterations", 

109 type=int, 

110 default=None, 

111 help="Shuffle the file this many times. Default: Auto choose best number.", 

112 ) 

113 parser.add_argument( 

114 "--max_ram", 

115 type=int, 

116 default=None, 

117 help="Available ram in bytes. Default: Use fraction of maximum " 

118 "available instead (see max_ram_fraction).", 

119 ) 

120 parser.set_defaults(func=h5shuffle2) 

121 

122 

123def main(): 

124 parser = argparse.ArgumentParser( 

125 prog="orcasong", 

126 description=__doc__, 

127 formatter_class=argparse.RawTextHelpFormatter, 

128 ) 

129 parser.add_argument('--version', action='version', version=version) 

130 

131 subparsers = parser.add_subparsers() 

132 from_toml.add_parser_run(subparsers) 

133 _add_parser_concatenate(subparsers) 

134 _add_parser_h5shuffle(subparsers) 

135 _add_parser_h5shuffle2(subparsers) 

136 plot_binstats.add_parser(subparsers) 

137 make_data_split.add_parser(subparsers) 

138 

139 kwargs = vars(parser.parse_args()) 

140 func = kwargs.pop("func") 

141 func(**kwargs)