Source code for pgfinder.find_pg

#!/usr/bin/env python3
"""Run pgfinder at the command line."""

import argparse as arg
import ast
import importlib.resources as pkg_resources
import logging
import warnings
from pathlib import Path

import yaml

from pgfinder.errors import UserError
from pgfinder.logs.logs import LOGGER_NAME, setup_logger
from pgfinder.matching import data_analysis
from pgfinder.pgio import (
    dataframe_to_csv_metadata,
    default_filename,
    ms_file_reader,
    read_yaml,
    theo_masses_reader,
)
from pgfinder.utils import update_config

LOGGER = setup_logger()
LOGGER = logging.getLogger(LOGGER_NAME)


[docs] def create_parser() -> arg.ArgumentParser: """Create a parser for reading options.""" parser = arg.ArgumentParser( description="Process sample. Additional arguments over-ride those in the configuration file." ) parser.add_argument( "-c", "--config_file", dest="config_file", required=False, help="Path to a YAML configuration file." ) parser.add_argument("--input_file", dest="input_file", required=False, help="Input File") parser.add_argument("--ppm_tolerance", dest="ppm_tolerance", type=float, required=False, help="PPM Toleraance.") parser.add_argument( "--consolidation_ppm", dest="consolidation_ppm", type=float, required=False, help="Maximum absolute ppm distance between consolidated structures.", ) parser.add_argument("--masses_file", dest="masses_file", type=str, required=False, help="Theoretical masses file.") parser.add_argument("--time_delta", dest="time_delta", type=int, required=False, help="Time delta.") parser.add_argument( "--mod_list", dest="mod_list", type=ast.literal_eval, required=False, help="Modifications to include." ) parser.add_argument("--output_dir", dest="output_dir", type=str, required=False, help="Output directory.") parser.add_argument("--warnings", dest="warnings", type=str, required=False, help="Whether to ignore warnings.") parser.add_argument("--quiet", dest="quiet", type=bool, required=False, help="Supress output.") parser.add_argument( "--float_format", dest="float_format", type=int, required=False, help="Decimal places in output." ) return parser
[docs] def process_file( input_file: str | Path, masses_file: str | Path, mod_list: list, ppm_tolerance: float = 10, consolidation_ppm: float = 1, time_delta: int = 0.5, output_dir: str | Path = "./", float_format: int = 4, to_csv: dict = None, ): """Process files Parameters ---------- input_file : str | Path Mass Spectrometry input file to process. masses_file : str | Path Input file of known masses. mod_list : list Modifications to include. ppm_tolerance : float Parts Per Million tolerance for matching. time_delta : int Time difference. output_dir : str | Path Output directory where results are written to. float_format : int Decimal places to use in CSV files. to_csv: dict Dictionary of options to pass to pd.to_csv(), primarly used to overwrite existing files. """ input_file = Path(input_file) masses_file = Path(masses_file) output_dir = Path(output_dir) df = ms_file_reader(input_file) masses = theo_masses_reader(masses_file) LOGGER.info(f"PPM Tolerance : {ppm_tolerance}") LOGGER.info(f"Time Delta : {time_delta}") results = data_analysis( raw_data_df=df, theo_masses_df=masses, rt_window=time_delta, enabled_mod_list=mod_list, ppm_tolerance=ppm_tolerance, consolidation_ppm=consolidation_ppm, ) LOGGER.info("Processing complete!") filename = default_filename() dataframe_to_csv_metadata( save_filepath=output_dir, output_dataframe=results, filename=filename, float_format=f"%.{float_format}f", ) LOGGER.info(f"Results with metadata saved to : {output_dir}/{filename}")
[docs] def main(): """Run processing.""" try: # Parse command line options, load config and update with command line options parser = create_parser() args = parser.parse_args() if args.config_file is not None: config = read_yaml(args.config_file) LOGGER.info(f"Configuration file loaded from : {args.config_file}") else: default_config = pkg_resources.open_text(__package__, "default_config.yaml") config = yaml.safe_load(default_config.read()) LOGGER.info("Default configuration file loaded.") config = update_config(config, args) # Optionally ignore all warnings or just show deprecation warnings if config["warnings"] == "ignore": warnings.filterwarnings("ignore") LOGGER.info("NB : All warnings have been turned off for this run.") elif config["warnings"] == "deprecated": def fxn(): warnings.warn("deprecated", DeprecationWarning, stacklevel=2) with warnings.catch_warnings(): warnings.simplefilter("ignore") fxn() if config["quiet"]: LOGGER.setLevel("ERROR") process_file( input_file=config["input_file"], masses_file=config["masses_file"], ppm_tolerance=config["ppm_tolerance"], consolidation_ppm=config["consolidation_ppm"], time_delta=config["time_delta"], mod_list=config["mod_list"], output_dir=config["output_dir"], float_format=config["float_format"], ) except UserError as e: # Avoid dumping a whole stack-trace if it's the user who's done something wrong LOGGER.error(e)
if __name__ == "__main__": main()