Source code for ms2pip.spectrum_input

"""Read MS2 spectra."""

from pathlib import Path
from typing import Generator

import numpy as np
from ms2rescore_rs import get_ms2_spectra

from ms2pip.exceptions import UnsupportedSpectrumFiletypeError
from ms2pip.spectrum import ObservedSpectrum


[docs]def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None, None]: """ Read MS2 spectra from a supported file format; inferring the type from the filename extension. Parameters ---------- spectrum_file Path to MGF or mzML file. Yields ------ ObservedSpectrum Raises ------ UnsupportedSpectrumFiletypeError If the file extension is not supported. """ file_extension = Path(spectrum_file).suffix.lower() if file_extension not in [".mgf", ".mzml", ".d"] and not _is_minitdf(spectrum_file): raise UnsupportedSpectrumFiletypeError(file_extension) for spectrum in get_ms2_spectra(str(spectrum_file)): obs_spectrum = ObservedSpectrum( mz=np.array(spectrum.mz, dtype=np.float32), intensity=np.array(spectrum.intensity, dtype=np.float32), identifier=str(spectrum.identifier), precursor_mz=float(spectrum.precursor.mz), precursor_charge=float(spectrum.precursor.charge), retention_time=float(spectrum.precursor.rt), ) # Workaround for mobiusklein/mzdata#3 if ( obs_spectrum.identifier == "" or obs_spectrum.mz.shape[0] == 0 or obs_spectrum.intensity.shape[0] == 0 ): continue yield obs_spectrum
def _is_minitdf(spectrum_file: str) -> bool: """ Check if the spectrum file is a Bruker miniTDF folder. A Bruker miniTDF folder has no fixed name, but contains files matching the patterns ``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``. """ files = set(Path(spectrum_file).glob("*ms2spectrum.bin")) files.update(Path(spectrum_file).glob("*ms2spectrum.parquet")) return len(files) >= 2