Analyses class and methods for handling Mass Spectrometry data
Source:R/class_Analyses_MassSpecAnalyses.R
MassSpecAnalyses.RdThe MassSpecAnalyses class represents mass spectrometry (MS) raw data files and holds results from processing MS data. It is a subclass of the Analyses class and provides methods to manage and inspect MS data. The MassSpecAnalyses class is built from a character vector of file paths to MS raw data files. The possible file formats are mzML and mzXML. If msconvert from ProteoWizard is installed and found via CLI (i.e., must be added to the environmental variables), the engine can also load vendor formats (i.e., Agilent MassHunter .d, Thermo Scientific RAW, Shimadzu LCD (except ITOF), Sciex WIFF/WIFF2) by direct conversion to mzML. Note that conversion of vendor formats is only possible under Windows OS.
Usage
MassSpecAnalyses(files = NULL, centroid = FALSE, levels = c(1, 2))
# S3 method for class 'MassSpecAnalyses'
validate_object(x)
# S3 method for class 'MassSpecAnalyses'
get_analysis_names(x)
# S3 method for class 'MassSpecAnalyses'
get_replicate_names(x)
# S3 method for class 'MassSpecAnalyses'
set_replicate_names(x, value)
# S3 method for class 'MassSpecAnalyses'
get_blank_names(x)
# S3 method for class 'MassSpecAnalyses'
set_blank_names(x, value)
# S3 method for class 'MassSpecAnalyses'
get_concentrations(x)
# S3 method for class 'MassSpecAnalyses'
set_concentrations(x, value)
# S3 method for class 'MassSpecAnalyses'
info(x)
# S3 method for class 'MassSpecAnalyses'
add(x, value)
# S3 method for class 'MassSpecAnalyses'
remove(x, value)
# S3 method for class 'MassSpecAnalyses'
x[i]
# S3 method for class 'MassSpecAnalyses'
x[i] <- value
# S3 method for class 'MassSpecAnalyses'
x[[i]]
# S3 method for class 'MassSpecAnalyses'
x[[i]] <- value
# S3 method for class 'MassSpecAnalyses'
save(x, file)
as.MassSpecAnalyses(value)
# S3 method for class 'MassSpecAnalyses'
read(x, file)
# S3 method for class 'MassSpecAnalyses'
get_spectra_tic(
x,
analyses = NULL,
levels = c(1, 2),
rt = NULL,
as_list = FALSE
)
# S3 method for class 'MassSpecAnalyses'
get_spectra_bpc(
x,
analyses = NULL,
levels = c(1, 2),
rt = NULL,
as_list = FALSE
)
# S3 method for class 'MassSpecAnalyses'
get_raw_spectra(
x,
analyses = NULL,
levels = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
allTraces = TRUE,
isolationWindow = 1.3,
minIntensityMS1 = 0,
minIntensityMS2 = 0
)
# S3 method for class 'MassSpecAnalyses'
get_spectra_eic(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL
)
# S3 method for class 'MassSpecAnalyses'
get_spectra_ms1(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
mzClust = 0.003,
presence = 0.8,
minIntensity = 1000
)
# S3 method for class 'MassSpecAnalyses'
get_spectra_ms2(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
isolationWindow = 1.3,
mzClust = 0.005,
presence = 0.8,
minIntensity = 0
)
# S3 method for class 'MassSpecAnalyses'
plot_spectra_tic(
x,
analyses = NULL,
levels = c(1, 2),
rt = NULL,
xLab = NULL,
yLab = NULL,
title = NULL,
colorBy = "analyses",
legendNames = NULL,
downsize = 1,
interactive = TRUE,
renderEngine = "webgl"
)
# S3 method for class 'MassSpecAnalyses'
plot_spectra_bpc(
x,
analyses = NULL,
levels = c(1, 2),
rt = NULL,
xLab = NULL,
yLab = NULL,
title = NULL,
colorBy = "analyses",
legendNames = NULL,
interactive = TRUE,
renderEngine = "webgl"
)
# S3 method for class 'MassSpecAnalyses'
plot_spectra_eic(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
legendNames = NULL,
xLab = NULL,
yLab = NULL,
title = NULL,
colorBy = "targets",
interactive = TRUE,
renderEngine = "webgl"
)
# S3 method for class 'MassSpecAnalyses'
plot_spectra_xic(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
legendNames = NULL,
plotTargetMark = TRUE,
targetsMark = NULL,
ppmMark = 5,
secMark = 10,
numberRows = 1,
renderEngine = "webgl"
)
# S3 method for class 'MassSpecAnalyses'
plot_spectra_ms1(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
mzClust = 0.003,
presence = 0.8,
minIntensity = 1000,
legendNames = NULL,
xLab = NULL,
yLab = NULL,
title = NULL,
colorBy = "targets",
showText = FALSE,
interactive = TRUE
)
# S3 method for class 'MassSpecAnalyses'
plot_spectra_ms2(
x,
analyses = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
isolationWindow = 1.3,
mzClust = 0.005,
presence = 0.8,
minIntensity = 0,
legendNames = NULL,
xLab = NULL,
yLab = NULL,
title = NULL,
colorBy = "targets",
showText = TRUE,
interactive = TRUE
)
# S3 method for class 'MassSpecAnalyses'
get_raw_chromatograms(
x,
analyses = NULL,
chromatograms = NULL,
rtmin = 0,
rtmax = 0,
minIntensity = NULL
)
# S3 method for class 'MassSpecAnalyses'
load_spectra(
x,
analyses = NULL,
levels = NULL,
mass = NULL,
mz = NULL,
rt = NULL,
mobility = NULL,
ppm = 20,
sec = 60,
millisec = 5,
id = NULL,
allTraces = TRUE,
isolationWindow = 1.3,
minIntensityMS1 = 0,
minIntensityMS2 = 0
)
# S3 method for class 'MassSpecAnalyses'
load_chromatograms(
x,
analyses = NULL,
chromatograms = NULL,
rtmin = 0,
rtmax = 0,
minIntensity = NULL
)
# S3 method for class 'MassSpecAnalyses'
get_matrix_suppression(x, rtWindowVal = 10)
# S3 method for class 'MassSpecAnalyses'
plot_matrix_suppression(
x,
analyses = NULL,
rtWindowVal = 10,
xLab = NULL,
yLab = NULL,
title = NULL,
colorBy = "analyses",
legendNames = NULL,
downsize = 1,
interactive = TRUE,
showLegend = TRUE,
renderEngine = "webgl"
)Arguments
- files
A character vector with full file path/s of mzML or mzXML file/s or a data.frame with the columns file, replicate and blank with the full file path of mzML or mzXML file/s (character), the replicate group name (character) and the associated blank replicate group name (character). Note the file paths must have the extension .mzML or .mzXML.
- centroid
Logical (length 1). Set to
TRUEto centroid data when converting from vendor formats to mzML.- levels
Integer vector with the levels of MS data.
- x
A
MassSpecAnalysesobject.- value
An object that highly depends on the method. Check the specific method documentation for details.
- i
A character or numeric vector to perform subsetting. Check the specific method for details.
- file
A string with a file path. Check the specific method or class for details on the file format.
- analyses
Character or numeric vector with names or indexes of analyses in the
Analysesobject.- rt
A vector with target retention time values (in seconds) or a two columns data.table or data.frame with minimum and maximum retention time values (in seconds).
- as_list
Logical (length 1). If
TRUE, the result is returned as a list. Default isFALSE, returning adata.table.- mass
A vector with target neutral mass value/s (in Da) or a two columns data.table or data.frame named
minandmaxwith minimum and maximum neutral mass values (in Da), respectively. Alternatively, neutral mass (in Da) and retention time (in seconds) and/or drift time values (in milliseconds) can be given as one data.table or data.frame with columns namedmassandrtand/ordrift. Then, the deviations given in theppm,secandmillisecarguments are used to calculate the ranges. Also works with a data.table or data.frame with minimum and maximum values of neutral mass, retention time and drift time targets. In this case, the column names must bemin,max,rtmin,rtmax,driftminanddriftmax. Note that when mass/time ranges are given, theppm,secandmillisecarguments are not used.- mz
A vector with target m/z value/s (in Da) or a two columns data.table or data.frame named
mzminandmzmaxwith minimum and maximum m/z values (in Da), respectively. Alternatively, m/z (in Da) and retention time values (in seconds) can be given as one data.table or data.frame with columns namedmzandrtand/ordrift. Then, the deviations given in theppm,secandmillisecarguments are used to calculate the ranges. Also works with a data.table or data.frame with minimum and maximum values of m/z, retention time and drift time targets. In this case, the column names must bemzmin,mzmax,rtmin,rtmax,driftminanddriftmax. Note that when mass/time ranges are given, theppm,secandmillisecarguments are not used.- mobility
A vector with target drift time values (in milliseconds) or a two columns data.table or data.frame with minimum and maximum drift time values (in milliseconds).
- ppm
Numeric of length one with the mass deviation, in ppm.
- sec
Numeric of length one with the time deviation, in seconds.
- millisec
Numeric of length one with the drift time deviation, in milliseconds.
- id
Character with the same length as m/z and retention time targets to be used as identifier/s. When not given, the id is built as a combination of the m/z and retention time ranges or values.
- allTraces
Logical, when
TRUEall level 2 data is returned. WhenFALSEand level has 2, only the MS2 traces of MS1 targets are returned, using theisolationWindowto calculate the mass window.- isolationWindow
Numeric value with the isolation window (in Da) applied for ion isolation and fragmentation during acquisition of tandem data (i.e., MS2 data).
- minIntensityMS1
Numeric value with the minimum intensity of level 1 data (i.e., MS1 data).
- minIntensityMS2
Numeric value with the minimum intensity of level 2 data (i.e., MS2 data).
- mzClust
Numeric (length 1) with the mass deviation threshold (in Da) to cluster mass traces. Highly dependent on the mass resolution of the MS data.
- presence
Numeric (length 1) with the required presence ratio from 0 (i.e., doesn't need to be present in any spectra) to 1 (i.e., must be present in all spectra) for traces during clustering of spectra.
- minIntensity
Numeric (length 1) with the minimum intensity.
- xLab
A string with the title for the x axis.
- yLab
A string with the title for the y axis.
- title
A string with the title.
- colorBy
Character (length 1). One of
analyses(the default),polarities,levels,targetsorreplicates. Mixed use (e.g.analyses+polarities) is also possible.- legendNames
A character vector with the same length as the targets or
TRUEorFALSEfor using the name in the added targets as legend of the plot.- downsize
An integer of length one to downsize the TIC plot. The default is 1.
- interactive
Logical (length 1). When
TRUE, the data is plotted interactively using plotly.- renderEngine
The engine to render the data. The default is "webgl".
- plotTargetMark
Logical (length 1), set to
TRUEto plot a target mark.- targetsMark
A data.frame with columns
mzandrt, defining the m/z and retention time values of each target. Note that the number of rows should match with the number of targets.- ppmMark
A numeric vector of length one to define the mass deviation, in ppm, of the target mark. The default is 5 ppm.
- secMark
A numeric vector of length one to define the time deviation, in seconds, of the target mark. The default is 10 ppm.
- numberRows
An integer vector of length one to define the number of rows to grid the plots. Note that each target is always plotted in one row for all selected analyses.
- showText
Logical (length 1), set to
TRUEto show the text annotations.- chromatograms
A character or integer vector with the ID (i.e. name) or the index of the chromatograms.
- rtmin
Numeric (length 1) with the minimum retention time.
- rtmax
Numeric (length 1) with the maximum retention time.
- rtWindowVal
Numeric (length 1) with the retention time window value (in seconds).
- showLegend
Logical (length 1). Set to
TRUEto show legend.
Value
An object of class MassSpecAnalyses as a list of two elements: analyses and results. Each element in analyses is a list with the following elements:
name: The name of the analysis.replicate: The name of the replicate for the analysis.blank: The name of the blank for the analysis.file: The file path of the analysis.format: The file format of the analysis.type: The type of the analysis (e.g., "MS", MS/MS).spectra_number: The number of spectra in the analysis.spectra_headers: Adata.tablewith the headers of the spectra in the analysis.chromatograms_number: The number of chromatograms in the analysis.chromatograms_headers: Adata.tablewith the headers of the chromatograms in the analysis.metadata: A list with metadata information for the analysis.concentration: The concentration for the analysis. Theresultselement is a list of results, where each element is a specific Results child class. Currently, theMassSpecAnalysesclass supports the following results: MassSpecResults_NonTargetAnalysis, MassSpecSpectra, and Chromatograms.
Methods (by generic)
validate_object(MassSpecAnalyses): Validate the MassSpecAnalyses object, returningNULLif valid or an error if not.get_analysis_names(MassSpecAnalyses): Get the names of the analyses in theMassSpecAnalysesobject.get_replicate_names(MassSpecAnalyses): Get the replicates of the analyses in theMassSpecAnalysesobject.set_replicate_names(MassSpecAnalyses): Set the replicates of the analyses in theMassSpecAnalysesobject. The argumentvaluemust be a character vector with the same length as the number of analyses in the object.get_blank_names(MassSpecAnalyses): Get the blanks of the analyses in theMassSpecAnalysesobject.set_blank_names(MassSpecAnalyses): Set the blanks of the analyses in theMassSpecAnalysesobject. The argumentvaluemust be a character vector with the same length as the number of analyses in the object.get_concentrations(MassSpecAnalyses): Get the concentrations of the analyses in theMassSpecAnalysesobject.set_concentrations(MassSpecAnalyses): Set the concentrations of the analyses in theMassSpecAnalysesobject. The argumentvaluemust be a numeric vector with the same length as the number of analyses in the object.info(MassSpecAnalyses): Get a summary of theMassSpecAnalysesobject.add(MassSpecAnalyses): Add analyses to theMassSpecAnalysesobject. The argumentvaluecan be a character vector with file paths to MS raw data files or a list ofMassSpecAnalysisobjects. If the files are valid, they will be converted toMassSpecAnalysisobjects.remove(MassSpecAnalyses): Remove analyses from theMassSpecAnalysesobject. The argumentvaluecan be a character vector with the names of the analyses to remove or a numeric vector with the indices of the analyses to remove. The method will also remove the corresponding results fromMassSpecResults_NonTargetAnalysis,Spectra, andChromatogramsif available.[: Subset theMassSpecAnalysesobject by indices, including the result elements:MassSpecResults_NonTargetAnalysis,Spectra, andChromatograms. The argumentican be a numeric vector with the indices of the analyses to keep.`[`(MassSpecAnalyses) <- value: Add analyses to theMassSpecAnalysesobject by indices. The argumentican be a numeric vector with the indices of the analyses to keep, andvaluecan be a character vector with file paths to MS raw data files or a list ofMassSpecAnalysisobjects.[[: Subset theMassSpecAnalysesobject by indices, including the result elements:MassSpecResults_NonTargetAnalysis,Spectra, andChromatograms. The argumentican be a numeric value with the index of the analysis to keep. The method returns aMassSpecAnalysesobject with only the specified analysis.`[[`(MassSpecAnalyses) <- value: Add analyses to theMassSpecAnalysesobject by indices. The argumentican be a numeric value with the index of the analysis to modify, andvaluecan be a character vector with file path to an MS raw data file.save(MassSpecAnalyses): Save aMassSpecAnalysesobject to a file. Thefilecan be in JSON or RDS format.read(MassSpecAnalyses): Read aMassSpecAnalysesobject from a file. Thefilecan be in JSON or RDS format.get_spectra_tic(MassSpecAnalyses): Get the total ion current (TIC) spectra for the specified analyses.get_spectra_bpc(MassSpecAnalyses): Get the base peak chromatograms (BPC) spectra for the specified analyses.get_raw_spectra(MassSpecAnalyses): Get raw spectra data from specified analyses, returning adata.tablewith the spectra data.get_spectra_eic(MassSpecAnalyses): Get extracted ion chromatograms (EIC) for the specified analyses and targets.get_spectra_ms1(MassSpecAnalyses): Get MS1 spectra for the specified analyses and targets.get_spectra_ms2(MassSpecAnalyses): Get MS2 spectra for the specified analyses and targets.plot_spectra_tic(MassSpecAnalyses): Plot total ion current (TIC) spectra for the specified analyses.plot_spectra_bpc(MassSpecAnalyses): Plot base peak chromatograms (BPC) spectra for the specified analyses.plot_spectra_eic(MassSpecAnalyses): Plot extracted ion chromatograms (EIC) for the specified analyses and targets.plot_spectra_xic(MassSpecAnalyses): Plot spectra extract ion chromatograms (EIC) and m/z vs retention time from the analyses.plot_spectra_ms1(MassSpecAnalyses): Plot MS1 spectra for the specified analyses and targets.plot_spectra_ms2(MassSpecAnalyses): Plot MS2 spectra for the specified analyses and targets.get_raw_chromatograms(MassSpecAnalyses): Get raw chromatograms from the analyses.load_spectra(MassSpecAnalyses): Load spectra from the analyses, adding them to the results as aMassSpecSpectraobject.load_chromatograms(MassSpecAnalyses): Load chromatograms from the analyses, adding them to the results as aChromatogramsobject.get_matrix_suppression(MassSpecAnalyses): Get TIC matrix suppression for the analyses using the blank replicate analyses as reference for the suppression factor. ThertWindowValargument defines the retention time window in seconds for the suppression factor calculation. The calculation is based on the work from Tisler et al. (2021).plot_matrix_suppression(MassSpecAnalyses): Plot TIC matrix suppression for the specified analyses. ThertWindowValargument defines the retention time window in seconds for the suppression factor calculation. The calculation is based on the work from Tisler et al. (2021).
Functions
as.MassSpecAnalyses(): Convert a valid list to aMassSpecAnalysesobject. This method is used internally and should not be called directly.
References
Kapoulkine A (2022). “pugixml 1.13: Light-weight, simple and fast XML parser for C++ with XPath support.” Copyright (C) 2006-2018. http://pugixml.org.
Kessner D, Chambers M, Burke R, Agus D, Mallick P (2008). “ProteoWizard: open source software for rapid proteomics tools development.” Bioinformatics, 24(21), 2534–2536. doi:10.1093/bioinformatics/btn323 , http://www.ncbi.nlm.nih.gov/pubmed/18606607.
Chambers MC, Maclean B, Burke R, Amodei D, Ruderman DL, Neumann S, Gatto L, Fischer B, Pratt B, Egertson J, Hoff K, Kessner D, Tasman N, Shulman N, Frewen B, Baker TA, Brusniak M, Paulse C, Creasy D, Flashner L, Kani K, Moulding C, Seymour SL, Nuwaysir LM, Lefebvre B, Kuhlmann F, Roark J, Rainer P, Detlev S, Hemenway T, Huhmer A, Langridge J, Connolly B, Chadick T, Holly K, Eckels J, Deutsch EW, Moritz RL, Katz JE, Agus DB, MacCoss M, Tabb DL, Mallick P (2012). “A cross-platform toolkit for mass spectrometry and proteomics.” Nature Biotechnology, 30(10), 918–920. https://doi.org/10.1038/nbt.2377.
Tisler S, Pattison DI, Christensen JH (2021). “Correction of Matrix Effects for Reliable Non-target Screening LC–ESI–MS Analysis of Wastewater.” Analytical Chemistry, 93(24), 8432-8441. doi:10.1021/acs.analchem.1c00357 , PMID: 34096716, https://doi.org/10.1021/acs.analchem.1c00357.
Tisler S, Pattison DI, Christensen JH (2021). “Correction of Matrix Effects for Reliable Non-target Screening LC–ESI–MS Analysis of Wastewater.” Analytical Chemistry, 93(24), 8432-8441. doi:10.1021/acs.analchem.1c00357 , PMID: 34096716, https://doi.org/10.1021/acs.analchem.1c00357.