Skip to content

NTS (Non-Target Screening) Module

Screening

calc_overlaping_stats

calc_overlaping_stats(motif, overlaping_fragments, overlaping_losses)

calculates the number of overlaping features and their cumulative intensity

Parameters:

Name Type Description Default
motif

matchms spectrum object

required
overlaping_fragments list

list of floats of mz values for fragments

required
overlaping_losses list

list of float of mz values for losses

required

Returns:

Name Type Description
n_overlaping_features int

number of features that did overlap between motif and query spectrum

sum_overlaping_features_intensities float

consensus intensity for all features that overlap between motif and query spectrum

Source code in MS2LDA/Add_On/NTS/Screening.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def calc_overlaping_stats(motif, overlaping_fragments, overlaping_losses):
    """calculates the number of overlaping features and their cumulative intensity

    ARGS:
        motif: matchms spectrum object
        overlaping_fragments (list): list of floats of mz values for fragments
        overlaping_losses (list): list of float of mz values for losses

    RETURNS:
        n_overlaping_features (int): number of features that did overlap between motif and query spectrum
        sum_overlaping_features_intensities (float): consensus intensity for all features that overlap between motif and query spectrum
    """

    # motif intensities of overlaping fragments
    motif_overlaping_fragments_intensities = []
    for overlaping_fragment in overlaping_fragments:
        motif_fragment_index = np.where(motif.peaks.mz == overlaping_fragment)[0][0]
        motif_fragment_intensity = motif.peaks.intensities[motif_fragment_index]
        motif_overlaping_fragments_intensities.append(motif_fragment_intensity)

    # motif intensities of overlaping losses
    motif_overlaping_losses_intensities = []
    for overlaping_loss in overlaping_losses:
        motif_loss_index = np.where(motif.losses.mz == overlaping_loss)[0][0]
        motif_loss_intensity = motif.losses.intensities[motif_loss_index]
        motif_overlaping_losses_intensities.append(motif_loss_intensity)

    # number of overlaping features, and cumulative intensity of overlaping features
    motif_overlaping_features_intensities = (
        motif_overlaping_fragments_intensities + motif_overlaping_losses_intensities
    )
    motif_sum_overlaping_features_intensities = np.sum(
        motif_overlaping_features_intensities
    )
    n_overlaping_features = len(motif_overlaping_features_intensities)

    return n_overlaping_features, motif_sum_overlaping_features_intensities

match_spectral_overlap

match_spectral_overlap(
    spectrum_1, spectrum_2, margin_fragments=0.005, margin_losses=0.01
)

calculates the overlap of two spectra considering the fragments and losses

Parameters:

Name Type Description Default
spectrum_1

matchms spectrum object

required
spectrum_2

matchms spectrum object

required
margin_fragments float

margin of error for fragments for a match

0.005
margin_losses float

margin of error for losses for a match

0.01

Returns:

Name Type Description
overlaping_fragments list

list for overlaping mz values of fragments

overlaping_losses list

list for overlaping mz values of losses

pearson_score float

pearson score for spectra with more than 1 overlaping feature

Source code in MS2LDA/Add_On/NTS/Screening.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def match_spectral_overlap(
    spectrum_1, spectrum_2, margin_fragments=0.005, margin_losses=0.01
):
    """calculates the overlap of two spectra considering the fragments and losses

    ARGS:
        spectrum_1: matchms spectrum object
        spectrum_2: matchms spectrum object
        margin_fragments (float): margin of error for fragments for a match
        margin_losses (float): margin of error for losses for a match

    RETURNS:
        overlaping_fragments (list): list for overlaping mz values of fragments
        overlaping_losses (list): list for overlaping mz values of losses
        pearson_score (float): pearson score for spectra with more than 1 overlaping feature
    """

    def find_overlap(arr1, arr2, margin):
        """finds the overlap of two arrays

        ARGS:
            arr1 (np.array): array of floats
            arr2 (np.array): array of floats
            maring (float): margin of error

        RETURNS:
            overlap (list): overlaping float values within the error margin
            idx_spectrum_1_intensities (list): indices of overlaping features in spectrum 1
            idx_spectrum_2_intensities (list): indices of overlaping features in spectrum 2
        """
        i, j = 0, 0
        overlap = []
        idx_spectrum_1_intensities = []
        idx_spectrum_2_intensities = []
        for i in range(len(arr1)):
            for j in range(len(arr2)):
                if abs(arr1[i] - arr2[j]) <= margin:
                    overlap.append(arr1[i])  # because arr1 is the motif spectrum
                    idx_spectrum_1_intensities.append(i)
                    idx_spectrum_2_intensities.append(j)
                    break
                if arr1[i] < arr2[j]:
                    break

        return overlap, idx_spectrum_1_intensities, idx_spectrum_2_intensities

    # spectrum 1 fragments and losses
    spectrum_1_fragments_mz = spectrum_1.peaks.mz
    spectrum_1_fragments_intensities = spectrum_1.peaks.intensities
    spectrum_1_losses_mz = spectrum_1.losses.mz
    spectrum_1_losses_intensities = spectrum_1.losses.intensities

    # spectrum 2 fragments and losses
    spectrum_2_fragments_mz = spectrum_2.peaks.mz
    spectrum_2_fragments_intensities = spectrum_2.peaks.intensities
    # spectrum_2_losses_mz = spectrum_2.losses.mz
    # spectrum_2_losses_intensities = spectrum_2.losses.intensities # what if there are no losses !!!!
    spectrum_2_losses_mz = np.array([0])
    spectrum_2_losses_intensities = np.array([0])

    # find overlaping features
    overlaping_fragments, idx_fragments_1_intensities, idx_fragments_2_intensities = (
        find_overlap(spectrum_1_fragments_mz, spectrum_2_fragments_mz, margin_fragments)
    )
    overlaping_losses, idx_losses_1_intensities, idx_losses_2_intensities = (
        find_overlap(spectrum_1_losses_mz, spectrum_2_losses_mz, margin_losses)
    )

    # pearson score for intensity trend
    intensities_spectrum_1 = list(
        spectrum_1_fragments_intensities[idx_fragments_1_intensities]
    ) + list(spectrum_1_losses_intensities[idx_losses_1_intensities])
    intensities_spectrum_2 = list(
        spectrum_2_fragments_intensities[idx_fragments_2_intensities]
    ) + list(spectrum_2_losses_intensities[idx_losses_2_intensities])

    if len(intensities_spectrum_1) >= 2:
        pearson_score = calc_pearson(intensities_spectrum_1, intensities_spectrum_2)[0]
    else:
        pearson_score = 0

    return overlaping_fragments, overlaping_losses, pearson_score

run_screen

run_screen(motif, spectra)

runs the screening for a given set of spectra against a motif spectrum

Parameters:

Name Type Description Default
motif

matchms spectrum object

required
spectra list

list of matchms spectrum objects

required

Returns:

Type Description

A,B,C,D (list): list of matchms spectra objects; A are the best matches D the worst matches

Source code in MS2LDA/Add_On/NTS/Screening.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def run_screen(motif, spectra):
    """runs the screening for a given set of spectra against a motif spectrum

    ARGS:
        motif: matchms spectrum object
        spectra (list): list of matchms spectrum objects

    RETURNS:
        A,B,C,D (list): list of matchms spectra objects; A are the best matches D the worst matches
    """

    A, B, C, D = [], [], [], []
    for spectrum in spectra:
        overlaping_fragments, overlaping_losses, pearson_score = match_spectral_overlap(
            motif, spectrum
        )
        n_overlaping_features, sum_overlaping_features_intensities = (
            calc_overlaping_stats(motif, overlaping_fragments, overlaping_losses)
        )
        match_category = screen_score(
            n_overlaping_features, sum_overlaping_features_intensities, pearson_score
        )

        if match_category == "A":
            A.append(spectrum)
        elif match_category == "B":
            B.append(spectrum)
        elif match_category == "C":
            C.append(spectrum)
        elif match_category == "D":
            D.append(spectrum)

    return A, B, C, D

save_as_csv

save_as_csv(spectra, smiles)

saves information about the matching spectra in comparison to the annotated compound/spectrum

Source code in MS2LDA/Add_On/NTS/Screening.py
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def save_as_csv(spectra, smiles):
    """saves information about the matching spectra in comparison to the annotated compound/spectrum"""

    results = pd.DataFrame({"id": list(range(len(spectra)))})

    engine = Msbuddy()

    for i, smi in enumerate(smiles):

        precursor_mzs = []
        retention_times = []
        mass_diffs = []
        formula_diffs = []

        for spectrum in spectra:
            precursor_mz = spectrum.get("precursor_mz")
            retention_time = spectrum.get("retention_time")

            if smi:
                precursor_mz_motif_mol = ExactMolWt(Chem.MolFromSmiles(smi))
                mass_diff = abs(precursor_mz - 1.007276470 - precursor_mz_motif_mol)

                formula_list = engine.mass_to_formula(mass_diff, 0.01, False)
                if formula_list:
                    formula_diff = formula_list[0].formula
                else:
                    formula_diff = "NaN"

                mass_diffs.append(mass_diff)
                formula_diffs.append(formula_diff)

            precursor_mzs.append(precursor_mz)
            retention_times.append(retention_time)

        results[f"mass_diff_{i}"] = mass_diffs
        results[f"formula_diff_{i}"] = formula_diffs

    results["precursor_mz"] = precursor_mzs
    results["retention_time"] = retention_times

    results.to_csv("results.csv")

    return results

screen_score

screen_score(
    n_overlaping_features, sum_overlaping_features_intensities, pearson_score
)

based on degree of query spectra-motif spectra overlapp the spectrum gets assigned to a certain level of similarity

Parameters:

Name Type Description Default
n_overlaping_features int

number of features that did overlap between motif and query spectrum

required
sum_overlaping_features_intensities float

consensus intensity for all features that overlap between motif and query spectrum

required

Returns:

Type Description
character

A, B, C or D if it fits one of the categories else None

Source code in MS2LDA/Add_On/NTS/Screening.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def screen_score(
    n_overlaping_features, sum_overlaping_features_intensities, pearson_score
):
    """based on degree of query spectra-motif spectra overlapp the spectrum gets assigned to a certain level of similarity

    ARGS:
        n_overlaping_features (int): number of features that did overlap between motif and query spectrum
        sum_overlaping_features_intensities (float): consensus intensity for all features that overlap between motif and query spectrum

    RETURNS:
        (character): A, B, C or D if it fits one of the categories else None
    """

    if (
        n_overlaping_features >= 3
        and sum_overlaping_features_intensities >= 0.9
        and pearson_score >= 0.7
    ):
        return "A"
    elif (
        n_overlaping_features >= 3
        and sum_overlaping_features_intensities >= 0.9
        and pearson_score >= 0.5
    ):
        return "B"
    elif (
        n_overlaping_features >= 2
        and sum_overlaping_features_intensities >= 0.5
        and pearson_score >= 0.3
    ):
        return "C"
    elif n_overlaping_features >= 2 and sum_overlaping_features_intensities >= 0.8:
        return "D"
    else:
        return None