Skip to content

MassQL Module

MassQL4MotifDB

motifDB2motifs

motifDB2motifs(motifDB_ms2, filter_table=pd.DataFrame())

converts a (filtered) MotifDB to motif spectra objects

Parameters:

Name Type Description Default
motifDB_ms2 dataframe

MassQL dataframe format for MS2 data

required
filter

output from massql query or None to convert all

required

RETURNS (list): list of matchms spectra objects

Source code in MS2LDA/Add_On/MassQL/MassQL4MotifDB.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def motifDB2motifs(motifDB_ms2, filter_table=pd.DataFrame()):
    """converts a (filtered) MotifDB to motif spectra objects

    ARGS:
        motifDB_ms2 (pd.dataframe): MassQL dataframe format for MS2 data
        filter: output from massql query or None to convert all

    RETURNS (list): list of matchms spectra objects
    """

    if not filter_table.empty:
        filtered_motifs = filter_table["scan"].to_list()
        filtered_motifDB = motifDB_ms2[motifDB_ms2["scan"].isin(filtered_motifs)]

    else:
        filtered_motifDB = motifDB_ms2

    ms2_df_grouped = group_ms2(ms2_df=filtered_motifDB)

    motif_spectra = []
    for motif in ms2_df_grouped.itertuples():
        fragments_mz = np.array(motif.frag_mz)
        fragments_mz = fragments_mz[~np.isnan(fragments_mz)]
        fragments_intensities = np.array(motif.frag_intens)
        fragments_intensities = fragments_intensities[~np.isnan(fragments_intensities)]

        losses_mz = np.array(motif.loss_mz)
        losses_mz = losses_mz[~np.isnan(losses_mz)]
        losses_intensities = np.array(motif.loss_intens)
        losses_intensities = losses_intensities[~np.isnan(losses_intensities)]

        name = motif.motif_id
        charge = motif.charge
        short_annotation = motif.short_annotation
        annotation = motif.annotation
        ms2accuracy = motif.ms2accuracy
        motifset = motif.motifset
        motif_id = motif.motif_id
        analysis_massspectrometer = motif.analysis_massspectrometer
        collision_energy = motif.collision_energy
        other_information = motif.other_information
        scientific_name = motif.scientific_name
        sample_type = motif.sample_type
        massive_id = motif.massive_id
        taxon_id = motif.taxon_id
        analysis_ionizationsource = motif.analysis_ionizationsource
        analysis_chromatographyandphase = motif.analysis_chromatographyandphase
        analysis_polarity = motif.analysis_polarity
        paper_url = motif.paper_url
        auto_annotation = motif.auto_annotation
        property = motif.property

        motif_spectrum = Mass2Motif(
            frag_mz=fragments_mz,
            frag_intensities=fragments_intensities,
            loss_mz=losses_mz,
            loss_intensities=losses_intensities,
            metadata={
                "id": name,
                "charge": charge,
                "short_annotation": short_annotation,
                "annotation": annotation,
                "ms2accuracy": ms2accuracy,
                "motifset": motifset,
                "motif_id": motif_id,
                "analysis_massspectrometer": analysis_massspectrometer,
                "collision_energy": collision_energy,
                "other_information": other_information,
                "scientific_name": scientific_name,
                "sample_type": sample_type,
                "massive_id": massive_id,
                "taxon_id": taxon_id,
                "analysis_ionizationsource": analysis_ionizationsource,
                "analysis_chromatographyandphase": analysis_chromatographyandphase,
                "analysis_polarity": analysis_polarity,
                "paper_url": paper_url,
                "auto_annotation": auto_annotation,
                "property": property,
            },
        )

        motif_spectra.append(motif_spectrum)

    return motif_spectra

motifs2motifDB

motifs2motifDB(spectra)

converts a set of motif spectra into a MassQL dataframe format

Parameters:

Name Type Description Default
spectra list

list of matchms spectra objects

required

Returns:

Name Type Description
ms1_df dataframe

dataframe with ms1 information (not used, but important for massql algo)

ms2_df dataframe

dataframe with ms2 (frag and loss) information

Source code in MS2LDA/Add_On/MassQL/MassQL4MotifDB.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def motifs2motifDB(spectra):
    """converts a set of motif spectra into a MassQL dataframe format

    ARGS:
        spectra (list): list of matchms spectra objects

    RETURNS:
        ms1_df (pd.dataframe): dataframe with ms1 information (not used, but important for massql algo)
        ms2_df (pd.dataframe): dataframe with ms2 (frag and loss) information
    """

    def add_default_info(feature_dict, spectrum, hash_id):
        """adds information about the motif like charge and its annotation as well as a hashed motif_id

        ARGS:
            feature_dict (dict): dictionary with fragment or loss properties
            spectrum: matchms spectrum object for motif

        RETURNS:
            feature_dict (dict): modified dictionary with add fragment and loss properties
        """

        feature_dict["charge"] = spectrum.get("charge")
        feature_dict["ms2accuracy"] = spectrum.get("ms2accuracy")
        feature_dict["short_annotation"] = spectrum.get("short_annotation")
        feature_dict["annotation"] = spectrum.get("annotation")
        feature_dict["auto_annotation"] = spectrum.get("auto_annotation")
        feature_dict["motif_id"] = spectrum.get("id")
        feature_dict["motifset"] = spectrum.get("motifset")
        feature_dict["analysis_massspectrometer"] = spectrum.get(
            "analysis_massspectrometer"
        )
        feature_dict["collision_energy"] = spectrum.get("collision_energy")
        feature_dict["other_information"] = spectrum.get("other_information")
        feature_dict["scientific_name"] = spectrum.get("scientific_name")
        feature_dict["sample_type"] = spectrum.get("sample_type")
        feature_dict["massive_id"] = spectrum.get("massive_id")
        feature_dict["taxon_id"] = spectrum.get("taxon_id")
        feature_dict["analysis_ionizationsource"] = spectrum.get(
            "analysis_ionizationsource"
        )
        feature_dict["analysis_chromatographyandphase"] = spectrum.get(
            "analysis_chromatographyandphase"
        )
        feature_dict["analysis_polarity"] = spectrum.get("analysis_polarity")
        feature_dict["paper_url"] = spectrum.get("paper_url")
        feature_dict["property"] = spectrum.get("property")

        feature_dict["scan"] = hash_id
        feature_dict["ms1scan"] = 0

        return feature_dict  # here jsonschema would be nice

    # jsonschema for submitting to MotifDB
    # add more columns: instrument, dda or dia, author, publication?
    # how to push to a public motifDB, pull request on github?

    ms2mz_list = []
    for spectrum in spectra:
        hash_id = random.getrandbits(128)
        if spectrum.peaks:

            fragments_mz = list(spectrum.peaks.mz)
            fragments_intensities = list(spectrum.peaks.intensities)

            for i in range(len(fragments_mz)):
                feature_dict = {}

                feature_dict["frag_mz"] = fragments_mz[i]
                feature_dict["frag_intens"] = fragments_intensities[i]

                feature_dict["loss_mz"] = np.nan
                feature_dict["loss_intens"] = np.nan

                feature_dict = add_default_info(feature_dict, spectrum, hash_id)

                ms2mz_list.append(feature_dict)

        if spectrum.losses:

            losses_mz = list(spectrum.losses.mz)
            losses_intensities = list(spectrum.losses.intensities)

            for i in range(len(losses_mz)):
                feature_dict = {}

                feature_dict["frag_mz"] = np.nan
                feature_dict["frag_intens"] = np.nan

                feature_dict["loss_mz"] = losses_mz[i]
                feature_dict["loss_intens"] = losses_intensities[i]

                feature_dict = add_default_info(feature_dict, spectrum, hash_id)

                ms2mz_list.append(feature_dict)

    ms1_df = pd.DataFrame([feature_dict])
    ms2_df = pd.DataFrame(ms2mz_list)

    return ms1_df, ms2_df

MassQL4Motifs Integration

massql4motifs