libpappsomspp
Library for mass spectrometry
pwizmsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3 * \date 29/05/2018
4 * \author Olivier Langella
5 * \brief MSrun file reader base on proteowizard library
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31
32#include <QDebug>
33
34#include "pwizmsrunreader.h"
35
36#include <pwiz/data/msdata/DefaultReaderList.hpp>
37
38
39#include "../../utils.h"
40#include "../../pappsoexception.h"
41#include "../../exception/exceptionnotfound.h"
42#include "../../exception/exceptionnotpossible.h"
43
44
45// int pwizMsRunReaderMetaTypeId =
46// qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47
48
49namespace pappso
50{
51
52
54 : MsRunReader(msrun_id_csp)
55{
56 // The initialization needs to be done immediately so that we get the pwiz
57 // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58 // pointer will be set to msp_msData.
59
60 initialize();
61}
62
63
64void
66{
67 std::string file_name_std =
69
70 // Make a backup of the current locale
71 std::string env_backup = setlocale(LC_ALL, "");
72 // struct lconv *lc = localeconv();
73
74 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75 //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76 //<< lc->decimal_point;
77
78 // Now actually search the useful MSDataPtr to the member variable.
79
80 pwiz::msdata::DefaultReaderList defaultReaderList;
81
82 std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83
84 try
85 {
86 defaultReaderList.read(file_name_std, msDataPtrVector);
87 }
88 catch(std::exception &error)
89 {
90 qDebug() << QString("Failed to read the data from file %1")
91 .arg(QString::fromStdString(file_name_std));
92
93 throw(PappsoException(
94 QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
95 .arg(mcsp_msRunId->getFileName())
96 .arg(mcsp_msRunId.get()->toString())
97 .arg(error.what())));
98 }
99
100 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
101 //<< "The number of runs is:" << msDataPtrVector.size()
102 //<< "The number of spectra in first run is:"
103 //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
104
105 // Single-run file handling here.
106
107 // Specific case of the MGF data format: we do not have a run id for that kind
108 // of data. In this case there must be a single run!
109
110 if(mcsp_msRunId->getRunId().isEmpty())
111 {
112 if(msDataPtrVector.size() != 1)
113 throw(
114 ExceptionNotPossible("For the kind of file at hand there can only be "
115 "one run in the file."));
116
117 // At this point we know the single msDataPtr is the one we are looking
118 // for.
119
120 msp_msData = msDataPtrVector.front();
121 }
122 else
123 {
124 // Multi-run file handling here.
125 for(auto &msDataPtr : msDataPtrVector)
126 {
127 if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
128 {
129 msp_msData = msDataPtr;
130
131 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
132 //<< "Found the right MSDataPtr for run id.";
133
134 break;
135 }
136 }
137 }
138
139 if(msp_msData == nullptr)
140 {
142 QString("Could not find a MSDataPtr matching the requested run id : %1")
143 .arg(mcsp_msRunId.get()->toString())));
144 }
145
146
147 // check if this MS run can be used with scan numbers
148 // MS:1000490 Agilent instrument model
149 pwiz::cv::CVID native_id_format =
150 pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
151
152 // msp_msData.get()->getDefaultNativeIDFormat();
153
154 if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
155 {
156 m_hasScanNumbers = true;
157 }
158 else
159 {
160 m_hasScanNumbers = false;
161 }
162
163 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
164 {
165 m_hasScanNumbers = true;
166 }
167}
168
169
171{
172}
173
174
175pwiz::msdata::SpectrumPtr
176PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
177 std::size_t spectrum_index,
178 bool want_binary_data) const
179{
180 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
181
182 try
183 {
184 native_pwiz_spectrum_sp =
185 p_spectrum_list->spectrum(spectrum_index, want_binary_data);
186 }
187 catch(std::runtime_error &error)
188 {
189 qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
190 << typeid(error).name();
191
192 throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
193 "MS file std::runtime_error :\n%2")
194 .arg(spectrum_index)
195 .arg(error.what()));
196 }
197 catch(std::exception &error)
198 {
199 qDebug() << "getPwizSpectrumPtr error " << error.what()
200 << typeid(error).name();
201
202 throw ExceptionNotFound(
203 QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
204 .arg(spectrum_index)
205 .arg(error.what()));
206 }
207
208 if(native_pwiz_spectrum_sp.get() == nullptr)
209 {
210 throw ExceptionNotFound(
211 QObject::tr(
212 "Pwiz spectrum index %1 not found in MS file : null pointer")
213 .arg(spectrum_index));
214 }
215
216 return native_pwiz_spectrum_sp;
217}
218
219
220bool
222 pwiz::msdata::Spectrum *spectrum_p,
223 QualifiedMassSpectrum &qualified_mass_spectrum) const
224{
225
226 // We now have to set the retention time at which this mass spectrum
227 // was acquired. This is the scan start time.
228
229 if(!spectrum_p->scanList.scans[0].hasCVParam(
230 pwiz::msdata::MS_scan_start_time))
231 {
232 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
233 { // MGF could not have scan start time
234 qualified_mass_spectrum.setRtInSeconds(-1);
235 }
236 else
237 {
239 "The spectrum has no scan start time value set."));
240 }
241 }
242 else
243 {
244 pwiz::data::CVParam retention_time_cv_param =
245 spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
246
247 // Try to get the units of the retention time value.
248
249 std::string unit_name = retention_time_cv_param.unitsName();
250 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
251 //<< "Unit name for the retention time:"
252 //<< QString::fromStdString(unit_name);
253
254 if(unit_name == "second")
255 {
256 qualified_mass_spectrum.setRtInSeconds(
257 retention_time_cv_param.valueAs<double>());
258 }
259 else if(unit_name == "minute")
260 {
261 qualified_mass_spectrum.setRtInSeconds(
262 retention_time_cv_param.valueAs<double>() * 60);
263 }
264 else
265 throw(
266 ExceptionNotPossible("Could not determine the unit for the "
267 "scan start time value."));
268 }
269
270 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
271 //<< "Retention time for spectrum is:"
272 //<< qualified_mass_spectrum.getRtInSeconds();
273
274 // Old version not checking unit (by default unit is minutes for RT,
275 // not seconds)
276 //
277 // pappso_double retentionTime =
278 // QString(spectrum_p->scanList.scans[0]
279 //.cvParam(pwiz::msdata::MS_scan_start_time)
280 //.value.c_str())
281 //.toDouble();
282 // qualified_mass_spectrum.setRtInSeconds(retentionTime);
283
284 return true;
285}
286
287
288bool
290 pwiz::msdata::Spectrum *spectrum_p,
291 QualifiedMassSpectrum &qualified_mass_spectrum) const
292{
293 // Not all the acquisitions have ion mobility data. We need to test
294 // that:
295
296 if(spectrum_p->scanList.scans[0].hasCVParam(
297 pwiz::msdata::MS_ion_mobility_drift_time))
298 {
299
300 // qDebug() << "as strings:"
301 //<< QString::fromStdString(
302 // spectrum_p->scanList.scans[0]
303 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
304 //.valueAs<std::string>());
305
306 pappso_double driftTime =
307 spectrum_p->scanList.scans[0]
308 .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
309 .valueAs<double>();
310
311 // qDebug() << "driftTime:" << driftTime;
312
313 // Old version requiring use of QString.
314 // pappso_double driftTime =
315 // QString(spectrum_p->scanList.scans[0]
316 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
317 //.value.c_str())
318 //.toDouble();
319
320 // Now make positively sure that the obtained value is correct.
321 // Note that I suffered a lot with Waters Synapt data that
322 // contained apparently correct drift time XML element that in
323 // fact contained either NaN or inf. When such mass spectra were
324 // encountered, the mz,i data were bogus and crashed the data
325 // loading functions. We just want to skip this kind of bogus mass
326 // spectrum by letting the caller know that the drift time was
327 // bogus ("I" is Filippo Rusconi).
328
329 if(std::isnan(driftTime) || std::isinf(driftTime))
330 {
331 // qDebug() << "detected as nan or inf.";
332
333 return false;
334 }
335 else
336 {
337 // The mzML standard stipulates that drift times are in
338 // milliseconds.
339 qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
340 }
341 }
342 // End of
343 // if(spectrum_p->scanList.scans[0].hasCVParam(
344 // pwiz::msdata::MS_ion_mobility_drift_time))
345 else
346 {
347 // Not a bogus mass spectrum but also not a drift spectrum, set -1
348 // as the drift time value.
349 qualified_mass_spectrum.setDtInMilliSeconds(-1);
350 }
351
352 return true;
353}
354
355
358 const MassSpectrumId &massSpectrumId,
359 pwiz::msdata::Spectrum *spectrum_p,
360 bool want_binary_data,
361 bool &ok) const
362{
363 // qDebug();
364
365 std::string env;
366 env = setlocale(LC_ALL, "");
367 setlocale(LC_ALL, "C");
368
369 QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
370
371 try
372 {
373
374 // We want to store the ms level for this spectrum
375
376 int msLevel =
377 (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
378
379 qualified_mass_spectrum.setMsLevel(msLevel);
380
381 // We want to know if this spectrum is a fragmentation spectrum obtained
382 // from a selected precursor ion.
383
384 std::size_t precursor_list_size = spectrum_p->precursors.size();
385
386 // qDebug() << "For spectrum at index:" <<
387 // massSpectrumId.getSpectrumIndex()
388 //<< "msLevel:" << msLevel
389 //<< "with number of precursors:" << precursor_list_size;
390
391 if(precursor_list_size > 0)
392 {
393
394 // Sanity check
395 if(msLevel < 2)
396 {
397 qDebug() << "Going to throw: msLevel cannot be less than two for "
398 "a spectrum that has items in its Precursor list.";
399
401 "msLevel cannot be less than two for "
402 "a spectrum that has items in its Precursor list."));
403 }
404
405 // See what is the first precursor in the list.
406
407 for(auto &precursor : spectrum_p->precursors)
408 {
409
410 // Set this variable ready as we need that default value in
411 // certain circumstances.
412
413 std::size_t precursor_spectrum_index =
414 std::numeric_limits<std::size_t>::max();
415
416 // The spectrum ID of the precursor might be empty.
417
418 if(precursor.spectrumID.empty())
419 {
420 // qDebug() << "The precursor's spectrum ID is empty.";
421
422 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
423 {
424 // qDebug()
425 //<< "Format is MGF, precursor's spectrum ID can be
426 // empty.";
427 }
428 else
429 {
430 // When performing Lumos Fusion fragmentation experiments
431 // in Tune mode and with recording, the first spectrum of
432 // the list is a fragmentation spectrum (ms level 2) that
433 // has no identity for the precursor spectrum because
434 // there is no full scan accquisition.
435 }
436 }
437 // End of
438 // if(precursor.spectrumID.empty())
439 else
440 {
441 // We could get a native precursor spectrum id, so convert
442 // that native id to a spectrum index.
443
444 qualified_mass_spectrum.setPrecursorNativeId(
445 QString::fromStdString(precursor.spectrumID));
446
447 if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
448 {
449 // qDebug() << "The native id of the precursor spectrum is
450 // empty.";
451 }
452
453 // Get the spectrum index of the spectrum that contained the
454 // precursor ion.
455
456 precursor_spectrum_index =
457 msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
458
459 // Note that the Mascot MGF format has a peculiar handling of
460 // the precursor ion stuff so we cannot throw.
461 if(precursor_spectrum_index ==
462 msp_msData->run.spectrumListPtr->size())
463 {
464 if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
465 {
467 "Failed to find the index of the "
468 "precursor ion's spectrum."));
469 }
470 }
471
472 qualified_mass_spectrum.setPrecursorSpectrumIndex(
473 precursor_spectrum_index);
474
475 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
476 // "()"
477 //<< "Set the precursor spectrum index to:"
478 //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
479 //<< "for qualified mass spectrum:"
480 //<< &qualified_mass_spectrum;
481 }
482
483 if(!precursor.selectedIons.size())
484 {
485 qDebug()
486 << "Going to throw The spectrum has msLevel > 1 but the "
487 "precursor ions's selected ions list is empty..";
488
489 throw(
490 ExceptionNotPossible("The spectrum has msLevel > 1 but the "
491 "precursor ions's selected ions "
492 "list is empty."));
493 }
494
495 pwiz::msdata::SelectedIon &ion =
496 *(precursor.selectedIons.begin());
497
498 // selected ion m/z
499
500 pappso_double selected_ion_mz =
501 QString(
502 ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
503 .toDouble();
504
505 // selected ion peak intensity
506
507 pappso_double selected_ion_peak_intensity =
508 QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
509 .toDouble();
510
511 // charge state
512
513 unsigned int selected_ion_charge_state =
514 QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
515 .toUInt();
516
517 // At this point we can craft a new PrecursorIonData instance and
518 // push it back to the vector.
519
520 PrecursorIonData precursor_ion_data(selected_ion_mz,
521 selected_ion_charge_state,
522 selected_ion_peak_intensity);
523
524 qualified_mass_spectrum.appendPrecursorIonData(
525 precursor_ion_data);
526
527 // General sum-up
528
529 // qDebug()
530 //<< "Appended new PrecursorIonData:"
531 //<< "mz:"
532 //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
533 //<< "charge:"
534 //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
535 //<< "intensity:"
536 //<< qualified_mass_spectrum.getPrecursorIonData()
537 //.back()
538 //.intensity;
539 }
540 // End of
541 // for(auto &precursor : spectrum_p->precursors)
542 }
543 // End of
544 // if(precursor_list_size > 0)
545 else
546 {
547 // Sanity check
548
549 // Unfortunately, logic here is defeated by some vendors that have
550 // files with MS2 spectra without <precursorList>. Thus we have
551 // spectrum_p->precursors.size() == 0 and msLevel > 1.
552
553 // if(msLevel != 1)
554 //{
555 // throw(
556 // ExceptionNotPossible("msLevel cannot be different than 1 if "
557 //"there is not a single precursor ion."));
558 //}
559 }
560
561 // Sanity check.
562
563 if(precursor_list_size !=
564 qualified_mass_spectrum.getPrecursorIonData().size())
565 {
566 qDebug() << "Going to throw The number of precursors in the file is "
567 "different from the number of precursors in memory.";
568
570 QObject::tr("The number of precursors in the file is different "
571 "from the number of precursors in memory."));
572 }
573
574 // if(precursor_list_size == 1)
575 //{
576 // qDebug() << "Trying to get the mz value of the unique precursor ion:"
577 //<< qualified_mass_spectrum.getPrecursorMz();
578 //}
579
580 processRetentionTime(spectrum_p, qualified_mass_spectrum);
581
582 processDriftTime(spectrum_p, qualified_mass_spectrum);
583
584 // for(pwiz::data::CVParam cv_param : ion.cvParams)
585 //{
586 // pwiz::msdata::CVID param_id = cv_param.cvid;
587 // qDebug() << param_id;
588 // qDebug() << cv_param.cvid.c_str();
589 // qDebug() << cv_param.name().c_str();
590 // qDebug() << cv_param.value.c_str();
591 //}
592
593 if(want_binary_data)
594 {
595
596 // Fill-in MZIntensityPair vector for convenient access to binary
597 // data
598
599 std::vector<pwiz::msdata::MZIntensityPair> pairs;
600 spectrum_p->getMZIntensityPairs(pairs);
601
602 MassSpectrum spectrum;
603 double tic = 0;
604 // std::size_t iterCount = 0;
605
606 // Iterate through the m/z-intensity pairs
607 for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
608 it = pairs.begin(),
609 end = pairs.end();
610 it != end;
611 ++it)
612 {
613 //++iterCount;
614
615 // qDebug() << "it->mz " << it->mz << " it->intensity" <<
616 // it->intensity;
617 if(it->intensity)
618 {
619 spectrum.push_back(DataPoint(it->mz, it->intensity));
620 tic += it->intensity;
621 }
622 }
623
624 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
625 {
626 // Sort peaks by mz
627 spectrum.sortMz();
628 }
629
630 // lc = localeconv ();
631 // qDebug() << " env=" << localeconv () << " lc->decimal_point "
632 // << lc->decimal_point;
633 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
634 // "<< spectrum.size();
635 MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
636 qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
637
638 // double sumY =
639 // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
640 // <<
641 // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
642 //<< "iterCount:" << iterCount << "Spectrum size "
643 //<< spectrum.size() << "with tic:" << tic
644 //<< "and sumY:" << sumY;
645 }
646 else
647 qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
648 }
649 catch(PappsoException &errorp)
650 {
651 qDebug() << "Going to throw";
652
654 QObject::tr("Error reading data using the proteowizard library: %1")
655 .arg(errorp.qwhat()));
656 }
657 catch(std::exception &error)
658 {
659 qDebug() << "Going to throw";
660
662 QObject::tr("Error reading data using the proteowizard library: %1")
663 .arg(error.what()));
664 }
665
666 // setlocale(LC_ALL, env.c_str());
667
668 ok = true;
669
670 // qDebug() << "QualifiedMassSpectrum: " <<
671 // qualified_mass_spectrum.toString();
672 return qualified_mass_spectrum;
673}
674
675
678 bool want_binary_data,
679 bool &ok) const
680{
681
682 std::string env;
683 env = setlocale(LC_ALL, "");
684 // struct lconv *lc = localeconv();
685
686 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
687 //<< "env=" << env.c_str()
688 //<< "lc->decimal_point:" << lc->decimal_point;
689
690 setlocale(LC_ALL, "C");
691
692 MassSpectrumId massSpectrumId(mcsp_msRunId);
693
694 if(msp_msData == nullptr)
695 {
696 setlocale(LC_ALL, env.c_str());
697 return (QualifiedMassSpectrum(massSpectrumId));
698 }
699
700 // const bool want_binary_data = true;
701
702 pwiz::msdata::SpectrumListPtr spectrum_list_p =
703 msp_msData->run.spectrumListPtr;
704
705 if(spectrum_index == spectrum_list_p.get()->size())
706 {
707 setlocale(LC_ALL, env.c_str());
708 throw ExceptionNotFound(
709 QObject::tr("The spectrum index cannot be equal to the size of the "
710 "spectrum list."));
711 }
712
713 // At this point we know the spectrum index might be sane, so store it in
714 // the mass spec id object.
715 massSpectrumId.setSpectrumIndex(spectrum_index);
716
717 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
718 getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
719
720 setlocale(LC_ALL, env.c_str());
721
722 massSpectrumId.setNativeId(
723 QString::fromStdString(native_pwiz_spectrum_sp->id));
724
726 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
727}
728
729
730bool
731PwizMsRunReader::accept(const QString &file_name) const
732{
733 // We want to know if we can handle the file_name.
734 pwiz::msdata::ReaderList reader_list;
735
736 std::string reader_type = reader_list.identify(file_name.toStdString());
737
738 if(!reader_type.empty())
739 return true;
740
741 return false;
742}
743
744
746PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
747{
748 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
749 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
750}
751
753PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
754{
755 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
756 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
757}
758
760PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
761 bool want_binary_data) const
762{
763
764 QualifiedMassSpectrum spectrum;
765 bool ok = false;
766
767 spectrum =
768 qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
769
770 if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
771 {
772 if(spectrum.getRtInSeconds() == 0)
773 {
774 // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
775 }
776 }
777
778 // if(!ok)
779 // qDebug() << "Encountered a mass spectrum for which the status is bad.";
780
781 return spectrum;
782}
783
784
785void
788{
790}
791
792void
794 [[maybe_unused]] const MsRunReadConfig &config,
796{
798}
799
800void
802 SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
803{
804
806 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
807
808 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
809 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
810 // spectrum has been fully qualified (that is, the member data have been
811 // set), it is transferred to the handler passed as parameter to this
812 // function for the consumer to do what it wants with it.
813
814 // Does the handler consuming the mass spectra read from file want these
815 // mass spectra to hold the binary data arrays (mz/i vectors)?
816
817 const bool want_binary_data = handler.needPeakList();
818
819
820 std::string env;
821 env = setlocale(LC_ALL, "");
822 setlocale(LC_ALL, "C");
823
824
825 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
826 // run member of msp_msData.
827
828 pwiz::msdata::SpectrumListPtr spectrum_list_p =
829 msp_msData->run.spectrumListPtr;
830
831 // We'll need it to perform the looping in the spectrum list.
832 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
833
834 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
835
836 // Inform the handler of the spectrum list so that it can handle feedback to
837 // the user.
838 handler.spectrumListHasSize(spectrum_list_size);
839
840 // Iterate in the full list of spectra.
841
842 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
843 {
844
845 // If the user of this reader instance wants to stop reading the
846 // spectra, then break this loop.
847 if(handler.shouldStop())
848 {
849 qDebug() << "The operation was cancelled. Breaking the loop.";
850 break;
851 }
852
853 // Get the native pwiz-spectrum from the spectrum list.
854 // Note that this pointer is a shared pointer from pwiz.
855
856 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
857 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
858
859 /*
860 * we want to load metadata of the spectrum even if it does not contain
861 peaks
862
863 * if(!native_pwiz_spectrum_sp->hasBinaryData())
864 {
865 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
866 "
867 ()"
868 //<< "native pwiz spectrum is empty, continuing.";
869 continue;
870 }
871 */
872
873 // Instantiate the mass spectrum id that will hold critical information
874 // like the the native id string and the spectrum index.
875
876 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
877
878 // Get the spectrum native id as a QString to store it in the mass
879 // spectrum id class. This is will allow later to refer to the same
880 // spectrum starting back from the file.
881
882 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
883 massSpectrumId.setNativeId(native_id);
884
885 // Finally, instantiate the qualified mass spectrum with its id. This
886 // function will continue performing pappso-spectrum detailed
887 // qualification.
888
889 bool ok = false;
890
891 QualifiedMassSpectrum qualified_mass_spectrum =
893 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
894
895 if(!ok)
896 {
897 // qDebug() << "Encountered a mass spectrum for which the returned "
898 //"status is bad.";
899 continue;
900 }
901
902 // Before handing the mass spectrum out to the handler, see if the
903 // native mass spectrum was empty or not.
904
905 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
906 // qDebug() << "The mass spectrum has not defaultArrayLength";
907
908 qualified_mass_spectrum.setEmptyMassSpectrum(
909 !native_pwiz_spectrum_sp->defaultArrayLength);
910
911 // The handler will receive the index of the mass spectrum in the
912 // current run via the mass spectrum id member datum.
913 if(ms_level == 0)
914 {
915 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
916 }
917 else
918 {
919 if(qualified_mass_spectrum.getMsLevel() == ms_level)
920 {
921 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
922 }
923 }
924 }
925
926 setlocale(LC_ALL, env.c_str());
927 // End of
928 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
929
930 // Now let the loading handler know that the loading of the data has ended.
931 // The handler might need this "signal" to perform additional tasks or to
932 // cleanup cruft.
933
934 // qDebug() << "Loading ended";
935 handler.loadingEnded();
936}
937
938std::size_t
940{
941 return msp_msData->run.spectrumListPtr.get()->size();
942}
943
944bool
946{
947 return m_hasScanNumbers;
948}
949
950bool
952{
953 msp_msData = nullptr;
954 return true;
955}
956
957bool
959{
960 if(msp_msData == nullptr)
961 {
962 initialize();
963 }
964 return true;
965}
966
967
970 std::size_t spectrum_index, pappso::PrecisionPtr precision) const
971{
972
973 QualifiedMassSpectrum mass_spectrum =
974 qualifiedMassSpectrum(spectrum_index, false);
975
976 return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
977}
978
981 const pappso::QualifiedMassSpectrum &mass_spectrum,
982 pappso::PrecisionPtr precision) const
983{
984 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
985
986 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
987
988 xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
989
990 return xic_coord;
991}
992
993} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:63
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:175
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
Get the precursor m/z ratio.
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition: utils.cpp:143
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:46
double pappso_double
A type definition for doubles.
Definition: types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
std::shared_ptr< XicCoord > XicCoordSPtr
Definition: xiccoord.h:43
MSrun file reader base on proteowizard library.