NTS (Non-Target Screening) Module¶

Screening ¶

calc_overlaping_stats ¶

calc_overlaping_stats(motif, overlaping_fragments, overlaping_losses)

calculates the number of overlaping features and their cumulative intensity

Parameters:

Name	Type	Description	Default
`motif`		matchms spectrum object	required
`overlaping_fragments`	`list`	list of floats of mz values for fragments	required
`overlaping_losses`	`list`	list of float of mz values for losses	required

Returns:

Name	Type	Description
`n_overlaping_features`	`int`	number of features that did overlap between motif and query spectrum
`sum_overlaping_features_intensities`	`float`	consensus intensity for all features that overlap between motif and query spectrum

Source code in MS2LDA/Add_On/NTS/Screening.py

def calc_overlaping_stats(motif, overlaping_fragments, overlaping_losses):
    """calculates the number of overlaping features and their cumulative intensity

    ARGS:
        motif: matchms spectrum object
        overlaping_fragments (list): list of floats of mz values for fragments
        overlaping_losses (list): list of float of mz values for losses

    RETURNS:
        n_overlaping_features (int): number of features that did overlap between motif and query spectrum
        sum_overlaping_features_intensities (float): consensus intensity for all features that overlap between motif and query spectrum
    """

    # motif intensities of overlaping fragments
    motif_overlaping_fragments_intensities = []
    for overlaping_fragment in overlaping_fragments:
        motif_fragment_index = np.where(motif.peaks.mz == overlaping_fragment)[0][0]
        motif_fragment_intensity = motif.peaks.intensities[motif_fragment_index]
        motif_overlaping_fragments_intensities.append(motif_fragment_intensity)

    # motif intensities of overlaping losses
    motif_overlaping_losses_intensities = []
    for overlaping_loss in overlaping_losses:
        motif_loss_index = np.where(motif.losses.mz == overlaping_loss)[0][0]
        motif_loss_intensity = motif.losses.intensities[motif_loss_index]
        motif_overlaping_losses_intensities.append(motif_loss_intensity)

    # number of overlaping features, and cumulative intensity of overlaping features
    motif_overlaping_features_intensities = (
        motif_overlaping_fragments_intensities + motif_overlaping_losses_intensities
    )
    motif_sum_overlaping_features_intensities = np.sum(
        motif_overlaping_features_intensities
    )
    n_overlaping_features = len(motif_overlaping_features_intensities)

    return n_overlaping_features, motif_sum_overlaping_features_intensities

match_spectral_overlap ¶

match_spectral_overlap(
    spectrum_1, spectrum_2, margin_fragments=0.005, margin_losses=0.01
)

calculates the overlap of two spectra considering the fragments and losses

Parameters:

Name	Type	Description	Default
`spectrum_1`		matchms spectrum object	required
`spectrum_2`		matchms spectrum object	required
`margin_fragments`	`float`	margin of error for fragments for a match	`0.005`
`margin_losses`	`float`	margin of error for losses for a match	`0.01`

Returns:

Name	Type	Description
`overlaping_fragments`	`list`	list for overlaping mz values of fragments
`overlaping_losses`	`list`	list for overlaping mz values of losses
`pearson_score`	`float`	pearson score for spectra with more than 1 overlaping feature

Source code in MS2LDA/Add_On/NTS/Screening.py

def match_spectral_overlap(
    spectrum_1, spectrum_2, margin_fragments=0.005, margin_losses=0.01
):
    """calculates the overlap of two spectra considering the fragments and losses

    ARGS:
        spectrum_1: matchms spectrum object
        spectrum_2: matchms spectrum object
        margin_fragments (float): margin of error for fragments for a match
        margin_losses (float): margin of error for losses for a match

    RETURNS:
        overlaping_fragments (list): list for overlaping mz values of fragments
        overlaping_losses (list): list for overlaping mz values of losses
        pearson_score (float): pearson score for spectra with more than 1 overlaping feature
    """

    def find_overlap(arr1, arr2, margin):
        """finds the overlap of two arrays

        ARGS:
            arr1 (np.array): array of floats
            arr2 (np.array): array of floats
            maring (float): margin of error

        RETURNS:
            overlap (list): overlaping float values within the error margin
            idx_spectrum_1_intensities (list): indices of overlaping features in spectrum 1
            idx_spectrum_2_intensities (list): indices of overlaping features in spectrum 2
        """
        i, j = 0, 0
        overlap = []
        idx_spectrum_1_intensities = []
        idx_spectrum_2_intensities = []
        for i in range(len(arr1)):
            for j in range(len(arr2)):
                if abs(arr1[i] - arr2[j]) <= margin:
                    overlap.append(arr1[i])  # because arr1 is the motif spectrum
                    idx_spectrum_1_intensities.append(i)
                    idx_spectrum_2_intensities.append(j)
                    break
                if arr1[i] < arr2[j]:
                    break

        return overlap, idx_spectrum_1_intensities, idx_spectrum_2_intensities

    # spectrum 1 fragments and losses
    spectrum_1_fragments_mz = spectrum_1.peaks.mz
    spectrum_1_fragments_intensities = spectrum_1.peaks.intensities
    spectrum_1_losses_mz = spectrum_1.losses.mz
    spectrum_1_losses_intensities = spectrum_1.losses.intensities

    # spectrum 2 fragments and losses
    spectrum_2_fragments_mz = spectrum_2.peaks.mz
    spectrum_2_fragments_intensities = spectrum_2.peaks.intensities
    # spectrum_2_losses_mz = spectrum_2.losses.mz
    # spectrum_2_losses_intensities = spectrum_2.losses.intensities # what if there are no losses !!!!
    spectrum_2_losses_mz = np.array([0])
    spectrum_2_losses_intensities = np.array([0])

    # find overlaping features
    overlaping_fragments, idx_fragments_1_intensities, idx_fragments_2_intensities = (
        find_overlap(spectrum_1_fragments_mz, spectrum_2_fragments_mz, margin_fragments)
    )
    overlaping_losses, idx_losses_1_intensities, idx_losses_2_intensities = (
        find_overlap(spectrum_1_losses_mz, spectrum_2_losses_mz, margin_losses)
    )

    # pearson score for intensity trend
    intensities_spectrum_1 = list(
        spectrum_1_fragments_intensities[idx_fragments_1_intensities]
    ) + list(spectrum_1_losses_intensities[idx_losses_1_intensities])
    intensities_spectrum_2 = list(
        spectrum_2_fragments_intensities[idx_fragments_2_intensities]
    ) + list(spectrum_2_losses_intensities[idx_losses_2_intensities])

    if len(intensities_spectrum_1) >= 2:
        pearson_score = calc_pearson(intensities_spectrum_1, intensities_spectrum_2)[0]
    else:
        pearson_score = 0

    return overlaping_fragments, overlaping_losses, pearson_score

run_screen ¶

run_screen(motif, spectra)

runs the screening for a given set of spectra against a motif spectrum

Parameters:

Name	Type	Description	Default
`motif`		matchms spectrum object	required
`spectra`	`list`	list of matchms spectrum objects	required

Returns:

Type	Description
	A,B,C,D (list): list of matchms spectra objects; A are the best matches D the worst matches

Source code in MS2LDA/Add_On/NTS/Screening.py

def run_screen(motif, spectra):
    """runs the screening for a given set of spectra against a motif spectrum

    ARGS:
        motif: matchms spectrum object
        spectra (list): list of matchms spectrum objects

    RETURNS:
        A,B,C,D (list): list of matchms spectra objects; A are the best matches D the worst matches
    """

    A, B, C, D = [], [], [], []
    for spectrum in spectra:
        overlaping_fragments, overlaping_losses, pearson_score = match_spectral_overlap(
            motif, spectrum
        )
        n_overlaping_features, sum_overlaping_features_intensities = (
            calc_overlaping_stats(motif, overlaping_fragments, overlaping_losses)
        )
        match_category = screen_score(
            n_overlaping_features, sum_overlaping_features_intensities, pearson_score
        )

        if match_category == "A":
            A.append(spectrum)
        elif match_category == "B":
            B.append(spectrum)
        elif match_category == "C":
            C.append(spectrum)
        elif match_category == "D":
            D.append(spectrum)

    return A, B, C, D

save_as_csv ¶

save_as_csv(spectra, smiles)

saves information about the matching spectra in comparison to the annotated compound/spectrum

Source code in MS2LDA/Add_On/NTS/Screening.py

def save_as_csv(spectra, smiles):
    """saves information about the matching spectra in comparison to the annotated compound/spectrum"""

    results = pd.DataFrame({"id": list(range(len(spectra)))})

    engine = Msbuddy()

    for i, smi in enumerate(smiles):

        precursor_mzs = []
        retention_times = []
        mass_diffs = []
        formula_diffs = []

        for spectrum in spectra:
            precursor_mz = spectrum.get("precursor_mz")
            retention_time = spectrum.get("retention_time")

            if smi:
                precursor_mz_motif_mol = ExactMolWt(Chem.MolFromSmiles(smi))
                mass_diff = abs(precursor_mz - 1.007276470 - precursor_mz_motif_mol)

                formula_list = engine.mass_to_formula(mass_diff, 0.01, False)
                if formula_list:
                    formula_diff = formula_list[0].formula
                else:
                    formula_diff = "NaN"

                mass_diffs.append(mass_diff)
                formula_diffs.append(formula_diff)

            precursor_mzs.append(precursor_mz)
            retention_times.append(retention_time)

        results[f"mass_diff_{i}"] = mass_diffs
        results[f"formula_diff_{i}"] = formula_diffs

    results["precursor_mz"] = precursor_mzs
    results["retention_time"] = retention_times

    results.to_csv("results.csv")

    return results

screen_score ¶

screen_score(
    n_overlaping_features, sum_overlaping_features_intensities, pearson_score
)

based on degree of query spectra-motif spectra overlapp the spectrum gets assigned to a certain level of similarity

Parameters:

Name	Type	Description	Default
`n_overlaping_features`	`int`	number of features that did overlap between motif and query spectrum	required
`sum_overlaping_features_intensities`	`float`	consensus intensity for all features that overlap between motif and query spectrum	required

Returns:

Type	Description
`character`	A, B, C or D if it fits one of the categories else None

Source code in MS2LDA/Add_On/NTS/Screening.py

def screen_score(
    n_overlaping_features, sum_overlaping_features_intensities, pearson_score
):
    """based on degree of query spectra-motif spectra overlapp the spectrum gets assigned to a certain level of similarity

    ARGS:
        n_overlaping_features (int): number of features that did overlap between motif and query spectrum
        sum_overlaping_features_intensities (float): consensus intensity for all features that overlap between motif and query spectrum

    RETURNS:
        (character): A, B, C or D if it fits one of the categories else None
    """

    if (
        n_overlaping_features >= 3
        and sum_overlaping_features_intensities >= 0.9
        and pearson_score >= 0.7
    ):
        return "A"
    elif (
        n_overlaping_features >= 3
        and sum_overlaping_features_intensities >= 0.9
        and pearson_score >= 0.5
    ):
        return "B"
    elif (
        n_overlaping_features >= 2
        and sum_overlaping_features_intensities >= 0.5
        and pearson_score >= 0.3
    ):
        return "C"
    elif n_overlaping_features >= 2 and sum_overlaping_features_intensities >= 0.8:
        return "D"
    else:
        return None