"""Definition and handling of MS²PIP results."""
from __future__ import annotations
import csv
from typing import Any, Dict, List, Optional, Tuple
from logging import getLogger
import numpy as np
from psm_utils import PSM
from pydantic import BaseModel, ConfigDict
try:
import spectrum_utils.plot as sup
import spectrum_utils.spectrum as sus
except ImportError:
sus = None
sup = None
from ms2pip.spectrum import ObservedSpectrum, PredictedSpectrum
logger = getLogger(__name__)
[docs]class ProcessingResult(BaseModel):
"""Result of processing a single PSM."""
psm_index: int
psm: Optional[PSM] = None
theoretical_mz: Optional[Dict[str, np.ndarray]] = None
predicted_intensity: Optional[Dict[str, np.ndarray]] = None
observed_intensity: Optional[Dict[str, np.ndarray]] = None
correlation: Optional[float] = None
feature_vectors: Optional[np.ndarray] = None
model_config = ConfigDict(arbitrary_types_allowed=True)
def __init__(__pydantic_self__, **data: Any) -> None:
"""Result of processing a single PSM."""
super().__init__(**data)
[docs] def as_spectra(self) -> Tuple[Optional[PredictedSpectrum], Optional[ObservedSpectrum]]:
"""Convert result to predicted and observed spectra."""
if not self.theoretical_mz:
raise ValueError("Theoretical m/z values required to convert to spectra.")
mz = np.concatenate([i for i in self.theoretical_mz.values()])
annotations = np.concatenate(
[
[ion_type + str(i + 1) for i in range(len(peaks))]
for ion_type, peaks in self.theoretical_mz.items()
]
)
peak_order = np.argsort(mz)
if self.predicted_intensity:
pred_int = np.concatenate([i for i in self.predicted_intensity.values()])
pred_int = (2**pred_int) - 0.001 # Unlog intensities
predicted = PredictedSpectrum(
mz=mz[peak_order],
intensity=pred_int[peak_order],
annotations=annotations[peak_order],
peptidoform=self.psm.peptidoform if self.psm else None,
precursor_charge=self.psm.peptidoform.precursor_charge if self.psm else None,
)
else:
predicted = None
if self.observed_intensity:
obs_int = np.concatenate([i for i in self.observed_intensity.values()])
obs_int = (2**obs_int) - 0.001 # Unlog intensities
observed = ObservedSpectrum(
mz=mz[peak_order],
intensity=obs_int[peak_order],
annotations=annotations[peak_order],
peptidoform=self.psm.peptidoform if self.psm else None,
precursor_charge=self.psm.peptidoform.precursor_charge if self.psm else None,
)
else:
observed = None
return predicted, observed
[docs] def plot_spectra(self):
"""
Plot predicted and observed spectra.
Returns
-------
matplotlib.axes.Axes
Notes
-----
Requires optional dependency ``spectrum_utils`` to be installed.
"""
predicted, observed = (
spec.to_spectrum_utils() if spec else None for spec in self.as_spectra()
)
if predicted and observed:
ax = sup.mirror(observed, predicted)
ax.set_title(
f"Observed (top) and predicted (bottom) spectra for {self.psm.peptidoform}"
)
elif predicted:
ax = sup.spectrum(predicted)
ax.set_title(f"Predicted spectrum for {self.psm.peptidoform}")
elif observed:
ax = sup.spectrum(observed)
ax.set_title(f"Observed spectrum for {self.psm.peptidoform}")
else:
raise ValueError("No spectra to plot.")
return ax
[docs]def calculate_correlations(results: List[ProcessingResult]) -> None:
"""Calculate and add Pearson correlations to list of results."""
for result in results:
pred_int = np.concatenate([i for i in result.predicted_intensity.values()])
obs_int = np.concatenate([i for i in result.observed_intensity.values()])
result.correlation = np.corrcoef(pred_int, obs_int)[0][1]
[docs]def write_correlations(results: List["ProcessingResult"], output_file: str) -> None:
"""Write correlations to CSV file."""
with open(output_file, "wt") as f:
fieldnames = ["psm_index", "correlation"]
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter="\t", lineterminator="\n")
writer.writeheader()
for result in results:
writer.writerow({"psm_index": result.psm_index, "correlation": result.correlation})