Skip to content

Spec2Vec Module

Annotation

annotation

calc_embeddings

calc_embeddings(s2v_similarity, spectra)

Calculate spectral embeddings for a list of spectra.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
21
22
23
24
25
26
27
def calc_embeddings(s2v_similarity, spectra):
    """Calculate spectral embeddings for a list of spectra."""
    spectral_embeddings = [
        np.array(s2v_similarity._calculate_embedding(Mass2MotifDocument(spectrum)))
        for spectrum in spectra
    ]
    return np.array(spectral_embeddings)

calc_similarity_faiss

calc_similarity_faiss(embeddings_A, embeddings_B, k=None)

Calculate cosine similarity using Faiss, ensuring proper normalization.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def calc_similarity_faiss(embeddings_A, embeddings_B, k=None):
    """Calculate cosine similarity using Faiss, ensuring proper normalization."""
    # Ensure embeddings are float32
    embeddings_A = embeddings_A.astype(np.float32)
    embeddings_B = embeddings_B.astype(np.float32)

    # Normalize embeddings (to ensure cosine similarity calculation)
    embeddings_A = normalize_embeddings(embeddings_A)
    embeddings_B = normalize_embeddings(embeddings_B)

    # Create a Faiss index with Inner Product (IP)
    index = faiss.IndexFlatIP(embeddings_B.shape[1])
    index.add(embeddings_B)

    # If k is not set, use all references
    if k is None:
        k = embeddings_B.shape[0]

    # Perform the search
    similarities, indices = index.search(embeddings_A, k)

    return similarities, indices

get_library_matches

get_library_matches(similarities, indices, db_path, top_n=10, unique_mols=True)

Return similarity scores, SMILES, and spectra for top n matches for all motifs.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def get_library_matches(similarities, indices, db_path, top_n=10, unique_mols=True):
    """Return similarity scores, SMILES, and spectra for top n matches for all motifs."""
    num_motifs = similarities.shape[0]
    library_matches = [
        get_library_matches_per_motif(
            similarities,
            indices,
            db_path,
            motif_number=i,
            top_n=top_n,
            unique_mols=unique_mols,
        )
        for i in tqdm(range(num_motifs))
    ]
    return library_matches

get_library_matches_per_motif

get_library_matches_per_motif(
    similarities, indices, db_path, motif_number=0, top_n=10, unique_mols=True
)

Return similarity scores, SMILES, and spectra for top n matches for one motif.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def get_library_matches_per_motif(
    similarities, indices, db_path, motif_number=0, top_n=10, unique_mols=True
):
    """Return similarity scores, SMILES, and spectra for top n matches for one motif."""
    top_smiles = []
    top_spectra = []
    top_scores = []
    top_inchikeys = []

    i = 0  # Index for iterating over ranked molecules
    while (
        len(top_smiles) < top_n and i < indices.shape[1]
    ):  # Ensure we collect 10 molecules
        score = similarities[motif_number, i]
        spectrum_id = indices[motif_number, i]
        spectrum_data = load_spectrum_from_db(db_path, spectrum_id)
        if spectrum_data is None:
            i += 1
            continue  # Skip missing data

        smi = spectrum_data["smiles"]
        spectrum = spectrum_data["spectrum"]

        if unique_mols:
            mol = MolFromSmiles(smi)
            inchi = MolToInchi(mol)
            inchikey = InchiToInchiKey(inchi)
            if inchikey in top_inchikeys:
                i += 1
                continue  # Skip duplicates
            else:
                top_inchikeys.append(inchikey)

        # Add the molecule to the results
        top_scores.append(score)
        top_smiles.append(smi)
        top_spectra.append(spectrum)

        i += 1  # Move to the next candidate

    return top_smiles, top_spectra, top_scores

load_s2v_model

load_s2v_model(path_model)

Load Spec2Vec model.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
12
13
14
15
16
17
18
def load_s2v_model(path_model):
    """Load Spec2Vec model."""
    s2v = Word2Vec.load(path_model)
    s2v_similarity = Spec2Vec(
        model=s2v, intensity_weighting_power=0.5, allowed_missing_percentage=100.0
    )
    return s2v_similarity

load_spectrum_from_db

load_spectrum_from_db(db_path, spectrum_id)

Load a spectrum from the SQLite database by ID.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def load_spectrum_from_db(db_path, spectrum_id):
    """Load a spectrum from the SQLite database by ID."""
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(
        """
        SELECT smiles, spectrum FROM spectra WHERE id = ?
    """,
        (int(spectrum_id),),
    )
    result = cursor.fetchone()
    conn.close()

    if result:
        smiles, spectrum_blob = result
        spectrum = pickle.loads(spectrum_blob)
        return {"smiles": smiles, "spectrum": spectrum}
    else:
        return None

normalize_embeddings

normalize_embeddings(embeddings)

Normalize embeddings safely to avoid overflow and NaN issues.

Source code in MS2LDA/Add_On/Spec2Vec/annotation.py
30
31
32
33
34
35
36
37
38
def normalize_embeddings(embeddings):
    """Normalize embeddings safely to avoid overflow and NaN issues."""
    embeddings = np.nan_to_num(embeddings)

    norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
    # norms[norms == 0] = 1  # Prevent division by zero
    embeddings /= norms

    return embeddings

Refined Annotation

annotation_refined

calc_similarity

calc_similarity(embeddings_A, embeddings_B)

calculates the cosine similarity of spectral embeddings

Parameters:

Name Type Description Default
embeddings_A array

query spectral embeddings

required
embeddings_B array

reference spectral embeddings

required

Returns:

Name Type Description
similarities_scores list

list of lists with s2v similarity scores

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
def calc_similarity(embeddings_A, embeddings_B):
    """calculates the cosine similarity of spectral embeddings

    ARGS:
        embeddings_A (np.array): query spectral embeddings
        embeddings_B (np.array): reference spectral embeddings

    RETURNS:
        similarities_scores (list): list of lists with s2v similarity scores
    """
    if type(embeddings_B) == pd.core.series.Series:
        embeddings_B = np.vstack(embeddings_B.to_numpy())

    similarity_vectors = []
    for embedding_A in embeddings_A:
        similarity_scores = cosine_similarity_matrix(
            np.array([embedding_A]), embeddings_B
        )[0]
        similarity_vectors.append(similarity_scores)

    similarities_matrix = pd.DataFrame(
        np.array(similarity_vectors).T, columns=range(len(embeddings_A))
    )

    return similarities_matrix

calc_similarity_matrix

calc_similarity_matrix(s2v_similarity, top_n_spectra, masked_spectra)

Calculates a similarity matrix between top hits.

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
def calc_similarity_matrix(s2v_similarity, top_n_spectra, masked_spectra):
    """Calculates a similarity matrix between top hits."""

    # Suppress the specific warning from spec2vec
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore", message=".*Spectrum without peaks known by the used model.*"
        )

        # Suppress logging warnings from spec2vec
        spec2vec_logger = logging.getLogger("spec2vec")
        spec2vec_logger.setLevel(
            logging.ERROR
        )  # Set logging level to ERROR to suppress warnings

        # Calculate embeddings
        embeddings_top_n_spectra = calc_embeddings(s2v_similarity, top_n_spectra)
        embeddings_masked_spectra = calc_embeddings(s2v_similarity, masked_spectra)

        # Calculate similarity
        masked_similarities = calc_similarity(
            embeddings_top_n_spectra, embeddings_masked_spectra
        )

    return masked_similarities.T

get_mz

get_mz(spectra, frag_err=2, loss_err=2)

extracts fragments and losses from a list of spectra

Parameters:

Name Type Description Default
spectra list

list of matchms spectra objects

required
frag_err int; default = 2

number of significant digits to round for fragments

2
loss_err int; default = 2

number of significant digits to round for losses

2

Returns:

Name Type Description
fragments_mz list

list of rounded float numbers for fragments

losses_mz list

list of rounded float numbers for losses

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def get_mz(spectra, frag_err=2, loss_err=2):
    """extracts fragments and losses from a list of spectra

    ARGS:
        spectra (list): list of matchms spectra objects
        frag_err (int; default = 2): number of significant digits to round for fragments
        loss_err (int; default = 2): number of significant digits to round for losses

    RETURNS:
        fragments_mz (list): list of rounded float numbers for fragments
        losses_mz (list): list of rounded float numbers for losses
    """
    fragments_mz = []
    losses_mz = []
    for spectrum in spectra:
        fragments_mz.append(set([round(frag, frag_err) for frag in spectrum.peaks.mz]))
        losses_mz.append(set([round(loss, loss_err) for loss in spectrum.losses.mz]))

    return fragments_mz, losses_mz

hits_intersection

hits_intersection(features)

returns values that a present across all input lists

Parameters:

Name Type Description Default
features list

list of either losses or fragments

required

Returns:

Name Type Description
common_features list

list of either losses or fragments that are the intersection of the given lists

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
48
49
50
51
52
53
54
55
56
57
58
def hits_intersection(features):
    """returns values that a present across all input lists

    ARGS:
        features (list): list of either losses or fragments

    RETURNS:
        common_features (list): list of either losses or fragments that are the intersection of the given lists
    """
    common_features = set.intersection(*features)
    return common_features

mask_fragments

mask_fragments(spectrum, mask=1.0)

masks fragments one by one

Parameters:

Name Type Description Default
spectrum

matchms spectrum object

required
mask float

mz with which fragments will be replaced with

1.0

Returns:

Name Type Description
masked_spectra list

list of matchms spectrum objects

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
def mask_fragments(spectrum, mask=1.0):
    """masks fragments one by one

    ARGS:
        spectrum: matchms spectrum object
        mask (float): mz with which fragments will be replaced with

    RETURNS:
        masked_spectra (list): list of matchms spectrum objects
    """
    identifier = spectrum.get("id")

    fragments_mz = list(spectrum.peaks.mz)
    fragments_intensities = list(spectrum.peaks.intensities)

    losses_mz = spectrum.losses.mz
    losses_intensities = spectrum.losses.intensities

    masked_spectra = []
    for index in range(len(fragments_mz)):
        masked_fragments_mz = fragments_mz.copy()
        masked_fragments_intensities = fragments_intensities.copy()

        masked_fragments_mz.pop(index)
        masked_fragments_intensities.pop(index)

        retrieved_fragment_intensity = fragments_intensities[index]

        masked_fragments_mz = [mask] + masked_fragments_mz
        masked_fragments_intensities = [
            retrieved_fragment_intensity
        ] + masked_fragments_intensities

        masked_spectrum = Mass2Motif(
            frag_mz=np.array(masked_fragments_mz),
            frag_intensities=np.array(masked_fragments_intensities),
            loss_mz=np.array(losses_mz),
            loss_intensities=np.array(losses_intensities),
            metadata={"id": identifier, "precursor_mz": None},
        )

        masked_spectra.append(masked_spectrum)

    return masked_spectra

mask_losses

mask_losses(spectrum, mask=0.0)

masks losses one by one

Parameters:

Name Type Description Default
spectrum

matchms spectrum object

required
mask float

mz with which losses will be replaced with

0.0

Returns:

Name Type Description
masked_spectra list

list of matchms spectrum objects

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
def mask_losses(
    spectrum, mask=0.0
):  # manually connecting mask_losses and mask fragments kind of failed when not combining (no frags)
    """masks losses one by one

    ARGS:
        spectrum: matchms spectrum object
        mask (float): mz with which losses will be replaced with

    RETURNS:
        masked_spectra (list): list of matchms spectrum objects
    """
    identifier = spectrum.get("id")

    fragments_mz = spectrum.peaks.mz
    fragments_intensities = spectrum.peaks.intensities

    losses_mz = list(spectrum.losses.mz)
    losses_intensities = list(spectrum.losses.intensities)

    masked_spectra = []
    for index in range(len(losses_mz)):
        masked_losses_mz = losses_mz.copy()
        masked_losses_intensities = losses_intensities.copy()

        masked_losses_mz.pop(index)
        masked_losses_intensities.pop(index)

        retrieved_loss_intensity = losses_intensities[index]

        masked_losses_mz = [mask] + masked_losses_mz
        masked_losses_intensities = [
            retrieved_loss_intensity
        ] + masked_losses_intensities

        masked_spectrum = Mass2Motif(
            frag_mz=np.array(fragments_mz),
            frag_intensities=np.array(fragments_intensities),
            loss_mz=np.array(masked_losses_mz),
            loss_intensities=np.array(masked_losses_intensities),
            metadata={"id": identifier, "precursor_mz": None},
        )

        masked_spectra.append(masked_spectrum)

    return masked_spectra

mask_spectra

mask_spectra(motif_spectra, masks=[-1.0, -1.0])

mask the fragments and losses for a list of spectra 1. mask is for fragments 2. mask is for losses

Parameters:

Name Type Description Default
motif_spectra list

list of matchms spectrum objects

required
masks list

list of float values for fragment and loss mask

[-1.0, -1.0]

Returns:

Name Type Description
masked_motifs_spectra list

list of lists of matchms spectrum objects; every list is for one motif

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
def mask_spectra(
    motif_spectra, masks=[-1.0, -1.0]
):  # BUG: if there are not fragments it fails!!!
    """mask the fragments and losses for a list of spectra
    1. mask is for fragments
    2. mask is for losses

    ARGS:
        motif_spectra (list): list of matchms spectrum objects
        masks (list): list of float values for fragment and loss mask

    RETURNS:
        masked_motifs_spectra (list): list of lists of matchms spectrum objects; every list is for one motif
    """

    masked_motifs_spectra = []
    for spectrum in motif_spectra:
        masked_fragments_spectra = mask_fragments(spectrum, masks[0])
        masked_losses_spectra = mask_losses(spectrum, masks[1])
        masked_features_spectra = masked_fragments_spectra + masked_losses_spectra

        masked_motifs_spectra.append(masked_features_spectra)

    return masked_motifs_spectra

motif_intersection_fragments

motif_intersection_fragments(motif_spectrum, common_fragments, frag_err=2)

retrieves mz values and intensities for fragments that are the intersection between the motif spectrum and the common hits fragments

Parameters:

Name Type Description Default
motif_spectrum

matchms.spectrum.object

required
common_fragments list

list of float values

required
frag_err int, default = 2

number of significant digits to round for fragments

2

Returns:

Name Type Description
opt_motif_fragments_mz list

list of float values representing mz values for an optimized motif

opt_motif_fragments_intensities list

list of float values representing intensity values for an optimized motif

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def motif_intersection_fragments(motif_spectrum, common_fragments, frag_err=2):
    """retrieves mz values and intensities for fragments that are the intersection between the motif spectrum and the common hits fragments

    ARGS:
        motif_spectrum: matchms.spectrum.object
        common_fragments (list): list of float values
        frag_err (int, default = 2): number of significant digits to round for fragments

    RETURNS:
        opt_motif_fragments_mz (list): list of float values representing mz values for an optimized motif
        opt_motif_fragments_intensities (list): list of float values representing intensity values for an optimized motif
    """
    opt_motif_fragments_mz = []
    opt_motif_fragments_intensities = []

    motif_spectrum_fragments_mz = [
        round(frag, frag_err) for frag in motif_spectrum.peaks.mz
    ]
    for fragment_mz in common_fragments:
        if fragment_mz in motif_spectrum_fragments_mz:
            index = motif_spectrum_fragments_mz.index(fragment_mz)
            fragment_intensity = motif_spectrum.peaks.intensities[index]
            fragment_mz = motif_spectrum.peaks.mz[index]

            opt_motif_fragments_mz.append(fragment_mz)
            opt_motif_fragments_intensities.append(fragment_intensity)

    return opt_motif_fragments_mz, opt_motif_fragments_intensities

motif_intersection_losses

motif_intersection_losses(motif_spectrum, common_losses, loss_err=2)

retrieves mz values and intensities for losses that are the intersection between the motif spectrum and the common hits losses

Parameters:

Name Type Description Default
motif_spectrum

matchms.spectrum.object

required
common_losses list

list of float values

required
loss_err int, default = 2

number of significant digits to round for losses

2

Returns:

Name Type Description
opt_motif_losses_mz list

list of float values representing mz values for an optimized motif

opt_motif_losses_intensities list

list of float values representing intensity values for an optimized motif

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def motif_intersection_losses(motif_spectrum, common_losses, loss_err=2):
    """retrieves mz values and intensities for losses that are the intersection between the motif spectrum and the common hits losses

    ARGS:
        motif_spectrum: matchms.spectrum.object
        common_losses (list): list of float values
        loss_err (int, default = 2): number of significant digits to round for losses

    RETURNS:
        opt_motif_losses_mz (list): list of float values representing mz values for an optimized motif
        opt_motif_losses_intensities (list): list of float values representing intensity values for an optimized motif
    """
    opt_motif_losses_mz = []
    opt_motif_losses_intensities = []

    motif_spectrum_losses_mz = [
        round(loss, loss_err) for loss in motif_spectrum.losses.mz
    ]
    for loss_mz in common_losses:
        if loss_mz in motif_spectrum_losses_mz:
            index = motif_spectrum_losses_mz.index(loss_mz)
            loss_intensity = motif_spectrum.losses.intensities[index]
            loss_mz = motif_spectrum.losses.mz[index]

            opt_motif_losses_mz.append(loss_mz)
            opt_motif_losses_intensities.append(loss_intensity)

    return opt_motif_losses_mz, opt_motif_losses_intensities

optimize_motif_spectrum

optimize_motif_spectrum(
    motif_spectrum, hit_spectra, smiles_cluster, frag_err=2, loss_err=2
)

runs all scripts from extracting features to overlapping them and creating an optimized motif

Parameters:

Name Type Description Default
motif_spectrum

matchms spectrum object

required
hit_spectra list

list of matchms spectrum objects

required
frag_err int; default = 2

number of significant digits to round for fragments

2
loss_err int; default = 2

number of significant digits to round for losses

2

Returns:

Name Type Description
opt_motif_spectrum

matchms spectrum object

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def optimize_motif_spectrum(
    motif_spectrum, hit_spectra, smiles_cluster, frag_err=2, loss_err=2
):
    """runs all scripts from extracting features to overlapping them and creating an optimized motif

    ARGS:
        motif_spectrum: matchms spectrum object
        hit_spectra (list): list of matchms spectrum objects
        frag_err (int; default = 2): number of significant digits to round for fragments
        loss_err (int; default = 2): number of significant digits to round for losses

    RETURNS:
        opt_motif_spectrum: matchms spectrum object
    """
    fragments_mz, losses_mz = get_mz(hit_spectra)

    common_fragments = hits_intersection(fragments_mz)
    opt_motif_fragments_mz, opt_motif_fragments_intensities = (
        motif_intersection_fragments(motif_spectrum, common_fragments, frag_err)
    )

    common_losses = hits_intersection(losses_mz)
    opt_motif_losses_mz, opt_motif_losses_intensities = motif_intersection_losses(
        motif_spectrum, common_losses, loss_err
    )

    opt_motif_spectrum = reconstruct_motif_spectrum(
        opt_motif_fragments_mz,
        opt_motif_fragments_intensities,
        opt_motif_losses_mz,
        opt_motif_losses_intensities,
    )
    opt_motif_spectrum.set("Auto_annotation", smiles_cluster)
    opt_motif_spectrum.set("short_annotation", None)
    opt_motif_spectrum.set("charge", motif_spectrum.get("charge"))
    opt_motif_spectrum.set("ms2accuracy", motif_spectrum.get("ms2accuracy"))
    opt_motif_spectrum.set("motifset", motif_spectrum.get("motifset"))
    opt_motif_spectrum.set("annotation", None)
    opt_motif_spectrum.set("id", motif_spectrum.get("id"))

    return opt_motif_spectrum

reconstruct_motif_spectrum

reconstruct_motif_spectrum(
    opt_motif_fragments_mz,
    opt_motif_fragments_intensities,
    opt_motif_losses_mz,
    opt_motif_losses_intensities,
)

creates a matchms spectrum object based on the optimized features

Parameters:

Name Type Description Default
opt_motif_fragments_mz list

list of float values representing mz values for an optimized motif (fragments)

required
opt_motif_fragments_intensities list

list of float values representing intensity values for an optimized motif (fragments)

required
opt_motif_losses_mz list

list of float values representing mz values for an optimized motif (losses)

required
opt_motif_losses_intensities list

list of float values representing intensity values for an optimized motif (losses)

required

Returns:

Name Type Description
opt_motif_spectrum

matchms spectrum object

Source code in MS2LDA/Add_On/Spec2Vec/annotation_refined.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def reconstruct_motif_spectrum(
    opt_motif_fragments_mz,
    opt_motif_fragments_intensities,
    opt_motif_losses_mz,
    opt_motif_losses_intensities,
):
    """creates a matchms spectrum object based on the optimized features

    ARGS:
        opt_motif_fragments_mz (list): list of float values representing mz values for an optimized motif (fragments)
        opt_motif_fragments_intensities (list): list of float values representing intensity values for an optimized motif (fragments)
        opt_motif_losses_mz (list): list of float values representing mz values for an optimized motif (losses)
        opt_motif_losses_intensities (list): list of float values representing intensity values for an optimized motif (losses)

    RETURNS:
        opt_motif_spectrum: matchms spectrum object
    """
    if (
        len(opt_motif_fragments_mz) == len(opt_motif_fragments_intensities)
        and len(opt_motif_fragments_mz) > 0
    ):
        sorted_fragments = sorted(
            zip(opt_motif_fragments_mz, opt_motif_fragments_intensities)
        )
        opt_motif_fragments_mz, opt_motif_fragments_intensities = zip(*sorted_fragments)
    else:
        opt_motif_fragments_mz = np.array([])
        opt_motif_losses_intensities = np.array([])

    if (
        len(opt_motif_losses_mz) == len(opt_motif_losses_intensities)
        and len(opt_motif_losses_mz) > 0
    ):  # I once saw that there was a loss mz 0f 1.003 and no intensity!!
        sorted_losses = sorted(zip(opt_motif_losses_mz, opt_motif_losses_intensities))
        opt_motif_losses_mz, opt_motif_losses_intensities = zip(*sorted_losses)

    else:
        opt_motif_losses_mz = np.array([])
        opt_motif_losses_intensities = np.array([])

    # opt_motif_spectrum = Spectrum(
    #    mz = np.array(opt_motif_fragments_mz),
    #    intensities = np.array(opt_motif_fragments_intensities),
    # )

    # if opt_motif_losses_mz and opt_motif_losses_intensities: # for some reasons it can be that losses have mz but no intensity for large numbers of extracted compounds
    #    sorted_losses = sorted(zip(opt_motif_losses_mz, opt_motif_losses_intensities))
    #    opt_motif_losses_mz, opt_motif_losses_intensities = zip(*sorted_losses)

    #    opt_motif_spectrum.losses = Fragments(
    #        mz=np.array(opt_motif_losses_mz),
    #        intensities=np.array(opt_motif_losses_intensities)
    #    )
    opt_motif_spectrum = Mass2Motif(
        frag_mz=np.array(opt_motif_fragments_mz),
        frag_intensities=np.array(opt_motif_fragments_intensities),
        loss_mz=np.array(opt_motif_losses_mz),
        loss_intensities=np.array(opt_motif_losses_intensities),
    )

    return opt_motif_spectrum