Source code for orcanet.logging

"""
Scripts for writing the logfiles.
"""

import numpy as np
import os
import tensorflow.keras as ks
from datetime import datetime
from shutil import move
import orcanet


[docs]class TrainfileLogger: def __init__(self, log_file, column_names): """ For writing the training log file in a nice format. Parameters ---------- log_file : opened file The logfile. column_names : List A list of column names for the file. """ # Minimum width of the cells in characters. self.minimum_cell_width = 11 # Precision to which floats are rounded if they appear in data. self.float_precision = 6 self.log_file = log_file self.column_names = column_names self._widths = None
[docs] def level_file(self): """ Make file with only the head lines. Existing file will be overwritten. """ headline, widths = self._gen_line_str(self.column_names) vline = ["-" * width for width in widths] vertical_line = self._gen_line_str(vline, widths, seperator="-+-")[0] self.log_file.write(headline + "\n") self.log_file.write(vertical_line + "\n") self._widths = widths
[docs] def write_line(self, values): """ Write a line with data to the file. Parameters ---------- values : List The data, in the same order as the column names. """ if self._widths is None: raise ValueError("Can not log: .level_file has to be called first") if len(values) != len(self.column_names): raise ValueError( "Can not log: Expected {} values, but got " "{}".format(len(self.column_names), len(values)) ) line = self._gen_line_str(values, self._widths)[0] self.log_file.write(line + "\n")
def _gen_line_str(self, data, widths=None, seperator=" | "): line, widths = gen_line_str( data, widths=widths, seperator=seperator, float_precision=self.float_precision, minimum_cell_width=self.minimum_cell_width, ) return line, widths
[docs]def gen_line_str( data, widths=None, seperator=" | ", float_precision=4, minimum_cell_width=9 ): """ Generate a line in nice human readable format, consisting of multiple spaced and seperated cells. Parameters ---------- data : tuple Strings or floats of what is in each cell. It must be in the same order and have the same length as the column names. widths : List or None Optional: The width of every cell. If None, will set it automatically, depending on the data. If widths is given, but what is given in data is wider than the width, the cell will expand without notice. Must have the same length as the column names. seperator : str String that seperates two adjacent cells. float_precision : int Precision to which floats are rounded if they appear in data. The length of the resulting numbercan be up to 5 characters longer than this value (due to . and e-09) minimum_cell_width : int Minimum width of the cells in characters. Returns ------- line : str The line. new_widths : List The widths of the cells. """ cells, new_widths = gen_line_cells( data, widths, float_precision, minimum_cell_width ) line = seperator.join(str(cell) for cell in cells) return line, new_widths
[docs]def gen_line_cells(data, widths=None, float_precision=4, minimum_cell_width=9): """ Generate the content of the cells for a line in the summary file. See gen_line_str (above) for doc. Returns ------- cells : List new_widths : List """ if widths is None: new_widths = [] else: new_widths = widths cells = [] for i, entry in enumerate(data): # If entry is a number, round to given precision and make it a string if not isinstance(entry, str): entry = format(float(entry), "." + str(float_precision) + "g") if widths is None: cell_width = max(minimum_cell_width, len(entry)) new_widths.append(cell_width) else: cell_width = widths[i] cell_cont = format(entry, "<" + str(cell_width)) cells.append(cell_cont) return cells, new_widths
[docs]class SummaryLogger: """ For writing the summary logfile made during training. Parameters ---------- orga : orcanet.core.Organizer Contains all the configurable options in the OrcaNet scripts. model : ks.model.Model or None Keras model containing the metrics to plot. """ def __init__(self, orga, model): self.orga = orga self.model = model # Minimum width of the cells in characters. self.minimum_cell_width = 11 # Precision to which floats are rounded if they appear in data. self.float_precision = 6 self.logfile_name = orga.cfg.output_folder + "summary.txt" self.temp_filepath = orga.cfg.output_folder + "/.temp_summary.txt"
[docs] def write_line(self, epoch_float, lr, history_train=None, history_val=None): """ Write a line to the summary.txt file in the trained model folder. Will update an existing line if possible. Notes ----- In tf 2.2, model.metrics_names is only filled after the model has been used on data, i.e. only after that point this line can be run. Otherwise, _get_column_names will throw a NameError. Parameters ---------- epoch_float : float The current epoch and fileno as a float. lr : float/str The current learning rate of the model. history_train : dict Dict containing the history of the training, averaged over files. Keys: Metric names, e.g. "loss", "accuracy", ... Values: Value of the metric during validation as a float. history_val : dict or None Dict of validation losses for all the metrics, averaged over all validation files. Keys: Metric names, e.g. "loss", "accuracy", ... Values: Value of the metric during validation as a float. """ if history_val is None and history_train is None: raise ValueError( "Can not summary log when both train and val history are None" ) widths = self._init_writing() # Format the content: (Epoch, LR, train_1, val_1, ...) data = [epoch_float, lr] for i, metric_name in enumerate(self.model.metrics_names): if history_train is None: data.append("n/a") else: data.append(history_train[metric_name]) if history_val is None: data.append("n/a") else: data.append(history_val[metric_name]) # if the epoch is already in the file, its line will get updated update_line = False summary_data = self.orga.history.get_summary_data() if len(summary_data) > 0: last_line = summary_data[-1] # get epoch to same length as it appears in file # TODO this is bad, epoch, fileno should probably be in the # summary.txt in their own columns data[0] = float(self._gen_line_cells(data, widths)[0][0]) if last_line["Epoch"] == data[0]: # merge arrays but ignore LR data = merge_arrays(last_line, data, exclude=1) update_line = True line = self._gen_line_str(data, widths)[0] self._save_line(line, update_line)
def _get_column_names(self): column_names = [ "Epoch", "LR", ] for metric_name in self.model.metrics_names: column_names.append("train_" + str(metric_name)) column_names.append("val_" + str(metric_name)) column_names = tuple(column_names) if os.path.isfile(self.logfile_name): # if summary exists already, check if model metrics match file_column_names = self.orga.history.get_column_names() if not set(column_names) == set(file_column_names): raise NameError( "Can not log to summary: column names differ (from model: " "{}, from summary file: {}".format(column_names, file_column_names) ) column_names = file_column_names return column_names def _save_line(self, line, update=False): """Write a line in the summary file. If update, overwrite the last line.""" if not update: with open(self.logfile_name, "a+") as logfile: logfile.write(line + "\n") else: # replace last line by rewriting whole summary file :-( with open(self.logfile_name, "r") as old_file: lines = old_file.readlines() lines[-1] = line + "\n" # make new summary file as a temp with open(self.temp_filepath, "w") as temp_file: for old_line in lines: temp_file.write(old_line) # Remove original file os.remove(self.logfile_name) # Move new file move(self.temp_filepath, self.logfile_name) def _init_writing(self): """ Get the widths of the columns, and write the head if the file is new. The widths have the length of the metric names, but at least the self.minimum_cell_width. Returns ------- widths : list The width of every cell in characters. """ column_names = self._get_column_names() headline, widths = self._gen_line_str(column_names) if ( not os.path.isfile(self.logfile_name) or os.stat(self.logfile_name).st_size == 0 ): vline = ["-" * width for width in widths] vertical_line = self._gen_line_str(vline, widths, seperator="-+-")[0] with open(self.logfile_name, "a+") as logfile: logfile.write(headline + "\n") logfile.write(vertical_line + "\n") return widths def _gen_line_str(self, data, widths=None, seperator=" | "): line, widths = gen_line_str( data, widths=widths, seperator=seperator, float_precision=self.float_precision, minimum_cell_width=self.minimum_cell_width, ) return line, widths def _gen_line_cells(self, data, widths=None): cells, widths = gen_line_cells( data, widths=widths, float_precision=self.float_precision, minimum_cell_width=self.minimum_cell_width, ) return cells, widths
[docs]def merge_arrays(base, supp, exclude=None): """ Fill nans in a list with values from another list. Parameters ---------- base : List supp : List exclude : List or int Which indices to ignore. Returns ------- np.array """ try: iter(exclude) except TypeError: exclude = [exclude] for i in range(len(base)): if exclude is not None and i in exclude: continue if base[i] == supp[i]: continue elif base[i] == "n/a" or np.isnan(base[i]): base[i] = supp[i] elif supp[i] == "n/a" or np.isnan(supp[i]): continue else: raise ValueError( "Cannot merge arrays at index {}: Base {}, supplement {}".format( i, base[i], supp[i] ) ) return base
[docs]class BatchLogger(ks.callbacks.Callback): """ Write logfiles during training. Averages the losses of the model over some number of batches, and then writes that in a line in the logfile. The Batch_float entry in the logfiles gives the absolute position of the batch in the epoch (i.e. taking all files into account). This class is intended to be used only for one epoch = one file. Parameters ---------- orga : orcanet.core.Organizer Contains all the configurable options in the OrcaNet scripts. epoch : tuple Epoch and file number. reset_metrics : bool Reset internal state of metric after eveery batch? """ def __init__(self, orga, epoch, reset_metrics=True): super().__init__() self.reset_metrics = reset_metrics self.epoch_number = epoch[0] self.f_number = epoch[1] # settings (read from orga) self.display = orga.cfg.train_logger_display self.flush = orga.cfg.train_logger_flush self.logfile_name = "{}/log_epoch_{}_file_{}.txt".format( orga.io.get_subfolder("train_log", create=True), self.epoch_number, self.f_number, ) self.batchsize = orga.cfg.batchsize self.file_sizes = np.array(orga.io.get_file_sizes("train")) # get the total no of batches over all files (not just the current one) # This is for calculating the batch_float number in the logs file_batches = np.ceil(self.file_sizes / self.batchsize) self.total_batches = np.sum(file_batches) # no of batches seen in previous files if self.f_number == 1: self.previous_batches = 0.0 elif self.f_number > 1: self.previous_batches = np.cumsum(file_batches)[self.f_number - 2] else: raise AssertionError("f_number not >= 1 ({})".format(self.f_number)) self.seen = None self.lines = None self.cum_metrics = None self.file = None self.epoch_initialized = False self._stored_metrics = False self._logger = None
[docs] def on_epoch_begin(self, epoch, logs=None): self.epoch_initialized = False
[docs] def initialize_epoch(self): """Start a new logfile and prepare the logger.""" # no of seen batches in this epoch self.seen = 0 # list of stored lines, so that multiple can be written at once self.lines = [] # store the various metrices to be able to average over multiple batches self.cum_metrics = {} for metric in self.model.metrics_names: self.cum_metrics[metric] = 0 self.file = open(self.logfile_name, "w") self._write_head()
[docs] def on_batch_end(self, batch, logs=None): # self.params: # {'epochs': 1, 'steps': 50, 'verbose': 1, 'do_validation': False, # 'metrics': ['loss', 'dx_loss', 'dx_err_loss', # 'val_loss', 'val_dx_loss', 'val_dx_err_loss']} # logs: # {'batch': 7, 'size': 5, 'loss': 2.06344, # 'dx_loss': 0.19809794, 'dx_err_loss': 0.08246058} logs = logs or {} if not self.epoch_initialized: self.initialize_epoch() self.epoch_initialized = True self.seen += 1 for metric in self.model.metrics_names: self.cum_metrics[metric] += logs.get(metric) if not self._stored_metrics: self._stored_metrics = True if self.seen % self.display == 0: self._write_line() if self.flush != -1 and self.display % self.flush == 0: self._flush_file() if self.reset_metrics: self.model.reset_metrics()
[docs] def on_epoch_end(self, batch, logs=None): # on epoch end here means that this is called after one fit_generator # loop in Keras is finished, so after one file in our case. """ if self._stored_metrics: # write stats of remaining batches self._write_line() """ self.file.close()
def _write_line(self): """Write a line with the metrics for current status and reset metrics.""" # The fraction is shifted by self.display / 2., so that it is in # the middle of the samples batch_frctn = ( self.epoch_number - 1 + (self.previous_batches + self.seen - self.display / 2.0) / self.total_batches ) line_data = [self.seen, batch_frctn] for metric in self.model.metrics_names: line_data.append(self.cum_metrics[metric] / self.display) self.cum_metrics[metric] = 0 self._logger.write_line(line_data) self._stored_metrics = False def _flush_file(self): self.file.flush() os.fsync(self.file.fileno()) def _write_head(self): """write column names for all losses / metrics""" column_names = ["Batch", "Batch_float"] for metric in self.model.metrics_names: column_names.append(metric) self._logger = TrainfileLogger(self.file, column_names) self._logger.level_file()
[docs]def log_start_training(orga): """ When a training is started for the first time, this logs all the input parameters to the log.txt file. Parameters ---------- orga : orcanet.core.Organizer Contains all the configurable options in the OrcaNet scripts. """ lines = [] log = lines.append log("-" * 60) time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log("-" * 19 + " {} ".format(time) + "-" * 19) log("-" * 10 + " orcanet version " + str(orcanet.__version__)) log("\nTraining run started with the following configuration:\n") log("Output folder:\t" + orga.cfg.output_folder) log("List file path:\t" + orga.cfg.get_list_file() + "\n") log("Given trainfiles in the .list file:") for input_name, input_files in orga.cfg.get_files("train").items(): log(" " + input_name + ":") [log("\t" + input_file) for input_file in input_files] log("\nGiven validation files in the .list file:") for input_name, input_files in orga.cfg.get_files("val").items(): log(" " + input_name + ":") [log("\t" + input_file) for input_file in input_files] log("\nSettings used:") for key, value in vars(orga.cfg).items(): if key == "output_folder" or key.startswith("_"): continue log(" {}:\t{}".format(key, value)) log("") orga.io.print_log(lines)
[docs]def log_start_validation(orga): """Log filenames used for validation.""" line = "Validation" orga.io.print_log(line) orga.io.print_log("-" * len(line)) lines = [ "Inputs and files:", ] for input_name, input_files in orga.io.get_local_files("val").items(): line = " " + input_name + ":\t" for i, input_file in enumerate(input_files): if i != 0: line += ", " line += os.path.basename(input_file) lines.append(line) orga.io.print_log(lines)
# class TensorBoardWrapper(ks.callbacks.TensorBoard): # """Up to now (05.10.17), Keras doesn't accept TensorBoard callbacks with validation data that is fed by a generator. # Supplying the validation data is needed for the histogram_freq > 1 argument in the TB callback. # Without a workaround, only scalar values (e.g. loss, accuracy) and the computational graph of the model can be saved. # # This class acts as a Wrapper for the ks.callbacks.TensorBoard class in such a way, # that the whole validation data is put into a single array by using the generator. # Then, the single array is used in the validation steps. This workaround is experimental!""" # def __init__(self, batch_gen, nb_steps, **kwargs): # super(TensorBoardWrapper, self).__init__(**kwargs) # self.batch_gen = batch_gen # The generator. # self.nb_steps = nb_steps # Number of times to call next() on the generator. # # def on_epoch_end(self, epoch, logs): # # Fill in the `validation_data` property. # # After it's filled in, the regular on_epoch_end method has access to the validation_data. # imgs, tags = None, None # for s in range(self.nb_steps): # ib, tb = next(self.batch_gen) # if imgs is None and tags is None: # imgs = np.zeros(((self.nb_steps * ib.shape[0],) + ib.shape[1:]), dtype=np.float32) # tags = np.zeros(((self.nb_steps * tb.shape[0],) + tb.shape[1:]), dtype=np.uint8) # imgs[s * ib.shape[0]:(s + 1) * ib.shape[0]] = ib # tags[s * tb.shape[0]:(s + 1) * tb.shape[0]] = tb # self.validation_data = [imgs, tags, np.ones(imgs.shape[0]), 0.0] # return super(TensorBoardWrapper, self).on_epoch_end(epoch, logs)