"""MS2 spectrum handling."""
from __future__ import annotations
import warnings
from typing import Any, Optional, Union
import numpy as np
from psm_utils import Peptidoform
from pydantic import model_validator, field_validator, ConfigDict, BaseModel
try:
import spectrum_utils.spectrum as sus
import spectrum_utils.plot as sup
except ImportError:
sus = None
sup = None
[docs]class Spectrum(BaseModel):
"""MS2 spectrum."""
mz: np.ndarray
intensity: np.ndarray
annotations: Optional[np.ndarray] = None
identifier: Optional[str] = None
peptidoform: Optional[Union[Peptidoform, str]] = None
precursor_mz: Optional[float] = None
precursor_charge: Optional[int] = None
retention_time: Optional[float] = None
mass_tolerance: Optional[float] = None
mass_tolerance_unit: Optional[str] = None
model_config = ConfigDict(arbitrary_types_allowed=True)
def __init__(__pydantic_self__, **data: Any) -> None:
"""
MS2 spectrum.
Parameters
----------
mz
Array of m/z values.
intensity
Array of intensity values.
annotations
Array of peak annotations.
identifier
Spectrum identifier.
peptidoform
Peptidoform.
precursor_mz
Precursor m/z.
precursor_charge
Precursor charge.
retention_time
Retention time.
mass_tolerance
Mass tolerance for spectrum annotation.
mass_tolerance_unit
Unit of mass tolerance for spectrum annotation.
"""
super().__init__(**data)
def __repr__(self) -> str:
return "{}.{}({})".format(
self.__class__.__module__,
self.__class__.__qualname__,
f"identifier='{self.identifier}'",
)
@model_validator(mode="after")
@classmethod
def check_array_lengths(cls, data: dict):
if len(data.mz) != len(data.intensity):
raise ValueError("Array lengths do not match.")
if data.annotations is not None:
if len(data.annotations) != len(data.intensity):
raise ValueError("Array lengths do not match.")
return data
@field_validator("peptidoform")
@classmethod
def check_peptidoform(cls, value):
if not value or isinstance(value, Peptidoform):
return value
elif isinstance(value, str):
return Peptidoform(value)
else:
raise ValueError("Peptidoform must be a string, a Peptidoform object, or None.")
@property
def tic(self):
"""Total ion current."""
return np.sum(self.intensity)
[docs] def remove_reporter_ions(self, label_type=None) -> None:
"""Set the intensity of reporter ions to 0."""
# TODO: Consider using the exact m/z values instead of a range.
if label_type == "iTRAQ":
for i, mz in enumerate(self.mz):
if (mz >= 113) & (mz <= 118):
self.intensity[i] = 0
# TMT6plex: 126.1277, 127.1311, 128.1344, 129.1378, 130.1411, 131.1382
elif label_type == "TMT":
for i, mz in enumerate(self.mz):
if (mz >= 125) & (mz <= 132):
self.intensity[i] = 0
[docs] def remove_precursor(self, tolerance=0.02) -> None:
"""Set the intensity of the precursor peak to 0."""
if not self.precursor_mz:
raise ValueError("Precursor m/z must be set.")
for i, mz in enumerate(self.mz):
if (mz >= self.precursor_mz - tolerance) & (mz <= self.precursor_mz + tolerance):
self.intensity[i] = 0
[docs] def tic_norm(self) -> None:
"""Normalize spectrum to total ion current."""
self.intensity = self.intensity / self.tic
[docs] def clip_intensity(self, min_intensity=0.0) -> None:
"""Clip intensity values."""
self.intensity = np.clip(self.intensity, min_intensity, None)
[docs] def to_spectrum_utils(self):
"""
Convert to spectrum_utils.spectrum.MsmsSpectrum.
Notes
-----
- Requires spectrum_utils to be installed.
- If the ``precursor_mz`` or ``precursor_charge`` attributes are not set, the theoretical
m/z and precursor charge of the ``peptidoform`` attribute are used, if present.
Otherwise, ``ValueError`` is raised.
"""
if not sus:
raise ImportError("Optional dependency spectrum_utils not installed.")
if self.precursor_charge:
precursor_charge = self.precursor_charge
else:
if not self.peptidoform:
raise ValueError("`precursor_charge` or `peptidoform` must be set.")
else:
precursor_charge = self.peptidoform.precursor_charge
if self.precursor_mz:
precursor_mz = self.precursor_mz
else:
if not self.peptidoform:
raise ValueError("`precursor_mz` or `peptidoform` must be set.")
else:
warnings.warn("precursor_mz not set, using theoretical precursor m/z.")
precursor_mz = self.peptidoform.theoretical_mz
spectrum = sus.MsmsSpectrum(
identifier=self.identifier if self.identifier else "spectrum",
precursor_mz=precursor_mz,
precursor_charge=precursor_charge,
mz=self.mz,
intensity=self.intensity,
retention_time=self.retention_time,
)
if self.peptidoform:
spectrum.annotate_proforma(
str(self.peptidoform), self.mass_tolerance, self.mass_tolerance_unit
)
return spectrum
[docs]class ObservedSpectrum(Spectrum):
"""Observed MS2 spectrum."""
pass
[docs]class PredictedSpectrum(Spectrum):
"""Predicted MS2 spectrum."""
mass_tolerance: Optional[float] = 0.001
mass_tolerance_unit: Optional[str] = "Da"
pass