Source code for altamisa.isatab.write_investigation

# -*- coding: utf-8 -*-
"""Code for parsing investigation files.
"""

from __future__ import generator_stop
import csv
import os
from typing import Dict, List, TextIO
import warnings

from ..exceptions import WriteIsatabException, WriteIsatabWarning
from ..constants import investigation_headers
from .helpers import is_ontology_term_ref
from . import models


__author__ = (
    "Manuel Holtgrewe <manuel.holtgrewe@bihealth.de>, "
    "Mathias Kuhring <mathias.kuhring@bihealth.de>"
)


# Helper to extract comments and align them into rows
def _extract_comments(section_objects: list):
    names = sorted({comment.name for obj in section_objects for comment in obj.comments})
    comments = {name: [""] * len(section_objects) for name in names}
    for i, obj in enumerate(section_objects):
        for comment in obj.comments:
            comments[comment.name][i] = comment.value
    return comments


# Helper to extract a section header
def _extract_section_header(first_entry, section_name):
    """
    Extract reference header from first entry (column) in a section, assuming all entries have
    the same header resp. same corresponding values available.
    """
    if first_entry and first_entry.headers:
        # TODO: check that headers and attributes match
        return first_entry.headers
    else:
        tpl = "No reference headers available for section {}. Applying default order."
        msg = tpl.format(section_name)
        warnings.warn(msg, WriteIsatabWarning)
        return None


# Helper to create a dict with keys to empty lists
def _init_multi_column_section(section_keys) -> dict:
    return {key: [] for key in section_keys}


[docs]class InvestigationWriter: """ Main class to write an investigation file from an ``InvestigationInfo`` object. :type investigation: models.InvestigationInfo :param investigation: The investigation model to write :type output_file: TextIO :param output_file: Output ISA-Tab investigation file :type quote: str :param quote: Optional quoting character (none by default) :type lineterminator: str :param lineterminator: Optional line terminator (OS specific by default) """
[docs] @classmethod def from_stream( cls, investigation: models.InvestigationInfo, output_file: TextIO, quote=None, lineterminator=None, ): """Construct from file-like object""" return InvestigationWriter(investigation, output_file, quote, lineterminator)
def __init__( self, investigation: models.InvestigationInfo, output_file: TextIO, quote=None, lineterminator=None, ): # Investigation model self.investigation = investigation # Investigation output file self.output_file = output_file # Quote for csv export self.quote = quote # Csv file writer self._writer = csv.writer( output_file, delimiter="\t", lineterminator=lineterminator or os.linesep, quoting=csv.QUOTE_NONE, # Can't use no quoting without escaping, so use different dummy quote here escapechar="\\", quotechar="|", )
[docs] def write(self): """Write investigation file""" self._write_ontology_source_reference() self._write_basic_info() self._write_publications() self._write_contacts() self._write_studies()
def _write_line(self, header, values): # Write an investigation line with header and values (potentially quoted) if self.quote: tpl = "".join((self.quote, "{}", self.quote)) values = [tpl.format(v) for v in values] self._writer.writerow((header, *values)) # Writer for headers and content of sections def _write_section( self, section_name: str, section: Dict[str, list], comments: Dict[str, list], headers: List[str] = None, ): # Add comments to section dict if comments: for key, value in comments.items(): section["Comment[{}]".format(key)] = value # Write the section name self._writer.writerow((section_name,)) # Write the lines in this section. if headers: # Use header order self._write_section_by_header_order(headers, section, section_name) else: # Use dict order for header, values in section.items(): self._write_line(header, values) def _write_section_by_header_order(self, headers, section, section_name): # Write section based on header order for header in headers: if header in section: values = section.pop(header) self._write_line(header, values) else: # pragma: no cover tpl = "No data found for header {} in section {}" msg = tpl.format(header, section_name) raise WriteIsatabException(msg) if len(section) > 0: # pragma: no cover tpl = "Leftover rows found in section {}:\n{}" msg = tpl.format(section_name, section) raise WriteIsatabException(msg) def _write_ontology_source_reference(self): # Write ONTOLOGY SOURCE REFERENCE section section = _init_multi_column_section(investigation_headers.ONTOLOGY_SOURCE_REF_KEYS) for ontology_ref in self.investigation.ontology_source_refs.values(): section[investigation_headers.TERM_SOURCE_NAME].append(ontology_ref.name) section[investigation_headers.TERM_SOURCE_FILE].append(ontology_ref.file) section[investigation_headers.TERM_SOURCE_VERSION].append(ontology_ref.version) section[investigation_headers.TERM_SOURCE_DESCRIPTION].append(ontology_ref.description) comments = _extract_comments(self.investigation.ontology_source_refs.values()) headers = _extract_section_header( list(self.investigation.ontology_source_refs.values())[0] if self.investigation.ontology_source_refs else None, investigation_headers.ONTOLOGY_SOURCE_REFERENCE, ) self._write_section( investigation_headers.ONTOLOGY_SOURCE_REFERENCE, section, comments, headers ) def _write_basic_info(self): # Write INVESTIGATION section basic_info = self.investigation.info section = { investigation_headers.INVESTIGATION_IDENTIFIER: [basic_info.identifier], investigation_headers.INVESTIGATION_TITLE: [basic_info.title], investigation_headers.INVESTIGATION_DESCRIPTION: [basic_info.description], investigation_headers.INVESTIGATION_SUBMISSION_DATE: [basic_info.submission_date], investigation_headers.INVESTIGATION_PUBLIC_RELEASE_DATE: [ basic_info.public_release_date ], } comments = _extract_comments([basic_info]) headers = _extract_section_header( self.investigation.info, investigation_headers.INVESTIGATION ) self._write_section(investigation_headers.INVESTIGATION, section, comments, headers) def _write_publications(self): # Write INVESTIGATION PUBLICATIONS section section = _init_multi_column_section(investigation_headers.INVESTIGATION_PUBLICATIONS_KEYS) for publication in self.investigation.publications: section[investigation_headers.INVESTIGATION_PUBMED_ID].append(publication.pubmed_id) section[investigation_headers.INVESTIGATION_PUBLICATION_DOI].append(publication.doi) section[investigation_headers.INVESTIGATION_PUBLICATION_AUTHOR_LIST].append( publication.authors ) section[investigation_headers.INVESTIGATION_PUBLICATION_TITLE].append(publication.title) if is_ontology_term_ref(publication.status): section[investigation_headers.INVESTIGATION_PUBLICATION_STATUS].append( publication.status.name or "" ) section[ investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER ].append(publication.status.accession or "") section[ investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_SOURCE_REF ].append(publication.status.ontology_name or "") else: section[investigation_headers.INVESTIGATION_PUBLICATION_STATUS].append( publication.status ) section[ investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER ].append("") section[ investigation_headers.INVESTIGATION_PUBLICATION_STATUS_TERM_SOURCE_REF ].append("") comments = _extract_comments(self.investigation.publications) headers = _extract_section_header( list(self.investigation.publications)[0] if self.investigation.publications else None, investigation_headers.INVESTIGATION_PUBLICATIONS, ) self._write_section( investigation_headers.INVESTIGATION_PUBLICATIONS, section, comments, headers ) def _write_contacts(self): # Write INVESTIGATION CONTACTS section section = _init_multi_column_section(investigation_headers.INVESTIGATION_CONTACTS_KEYS) for contact in self.investigation.contacts: section[investigation_headers.INVESTIGATION_PERSON_LAST_NAME].append(contact.last_name) section[investigation_headers.INVESTIGATION_PERSON_FIRST_NAME].append( contact.first_name ) section[investigation_headers.INVESTIGATION_PERSON_MID_INITIALS].append( contact.mid_initial ) section[investigation_headers.INVESTIGATION_PERSON_EMAIL].append(contact.email) section[investigation_headers.INVESTIGATION_PERSON_PHONE].append(contact.phone) section[investigation_headers.INVESTIGATION_PERSON_FAX].append(contact.fax) section[investigation_headers.INVESTIGATION_PERSON_ADDRESS].append(contact.address) section[investigation_headers.INVESTIGATION_PERSON_AFFILIATION].append( contact.affiliation ) if is_ontology_term_ref(contact.role): section[investigation_headers.INVESTIGATION_PERSON_ROLES].append( contact.role.name or "" ) section[ investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_ACCESSION_NUMBER ].append(contact.role.accession or "") section[investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_SOURCE_REF].append( contact.role.ontology_name or "" ) else: section[investigation_headers.INVESTIGATION_PERSON_ROLES].append(contact.role) section[ investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_ACCESSION_NUMBER ].append("") section[investigation_headers.INVESTIGATION_PERSON_ROLES_TERM_SOURCE_REF].append("") comments = _extract_comments(self.investigation.contacts) headers = _extract_section_header( list(self.investigation.contacts)[0] if self.investigation.contacts else None, investigation_headers.INVESTIGATION_CONTACTS, ) self._write_section( investigation_headers.INVESTIGATION_CONTACTS, section, comments, headers ) def _write_studies(self): # Write STUDY sections for study in self.investigation.studies: self._write_study_basic_info(study) self._write_study_design_descriptors(study) self._write_study_publications(study) self._write_study_factors(study) self._write_study_assays(study) self._write_study_protocols(study) self._write_study_contacts(study) def _write_study_basic_info(self, study: models.StudyInfo): # Read STUDY INFO section basic_info = study.info section = { investigation_headers.STUDY_IDENTIFIER: [basic_info.identifier], investigation_headers.STUDY_TITLE: [basic_info.title], investigation_headers.STUDY_DESCRIPTION: [basic_info.description], investigation_headers.STUDY_SUBMISSION_DATE: [basic_info.submission_date], investigation_headers.STUDY_PUBLIC_RELEASE_DATE: [basic_info.public_release_date], investigation_headers.STUDY_FILE_NAME: [basic_info.path or ""], } comments = _extract_comments([basic_info]) headers = _extract_section_header(basic_info, investigation_headers.STUDY) self._write_section(investigation_headers.STUDY, section, comments, headers) def _write_study_design_descriptors(self, study: models.StudyInfo): # Read STUDY DESIGN DESCRIPTORS section section = _init_multi_column_section(investigation_headers.STUDY_DESIGN_DESCR_KEYS) for design in study.designs: if is_ontology_term_ref(design.type): section[investigation_headers.STUDY_DESIGN_TYPE].append(design.type.name or "") section[investigation_headers.STUDY_DESIGN_TYPE_TERM_ACCESSION_NUMBER].append( design.type.accession or "" ) section[investigation_headers.STUDY_DESIGN_TYPE_TERM_SOURCE_REF].append( design.type.ontology_name or "" ) else: section[investigation_headers.STUDY_DESIGN_TYPE].append(design.type) section[investigation_headers.STUDY_DESIGN_TYPE_TERM_ACCESSION_NUMBER].append("") section[investigation_headers.STUDY_DESIGN_TYPE_TERM_SOURCE_REF].append("") comments = _extract_comments(study.designs) headers = _extract_section_header( list(study.designs)[0] if study.designs else None, investigation_headers.STUDY_DESIGN_DESCRIPTORS, ) self._write_section( investigation_headers.STUDY_DESIGN_DESCRIPTORS, section, comments, headers ) def _write_study_publications(self, study: models.StudyInfo): # Write STUDY PUBLICATIONS section section = _init_multi_column_section(investigation_headers.STUDY_PUBLICATIONS_KEYS) for publication in study.publications: section[investigation_headers.STUDY_PUBMED_ID].append(publication.pubmed_id) section[investigation_headers.STUDY_PUBLICATION_DOI].append(publication.doi) section[investigation_headers.STUDY_PUBLICATION_AUTHOR_LIST].append(publication.authors) section[investigation_headers.STUDY_PUBLICATION_TITLE].append(publication.title) if is_ontology_term_ref(publication.status): section[investigation_headers.STUDY_PUBLICATION_STATUS].append( publication.status.name or "" ) section[ investigation_headers.STUDY_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER ].append(publication.status.accession or "") section[investigation_headers.STUDY_PUBLICATION_STATUS_TERM_SOURCE_REF].append( publication.status.ontology_name or "" ) else: section[investigation_headers.STUDY_PUBLICATION_STATUS].append(publication.status) section[ investigation_headers.STUDY_PUBLICATION_STATUS_TERM_ACCESSION_NUMBER ].append("") section[investigation_headers.STUDY_PUBLICATION_STATUS_TERM_SOURCE_REF].append("") comments = _extract_comments(study.publications) headers = _extract_section_header( list(study.publications)[0] if study.publications else None, investigation_headers.STUDY_PUBLICATIONS, ) self._write_section(investigation_headers.STUDY_PUBLICATIONS, section, comments, headers) def _write_study_factors(self, study: models.StudyInfo): # Write STUDY FACTORS section section = _init_multi_column_section(investigation_headers.STUDY_FACTORS_KEYS) for factor in study.factors.values(): section[investigation_headers.STUDY_FACTOR_NAME].append(factor.name) if is_ontology_term_ref(factor.type): section[investigation_headers.STUDY_FACTOR_TYPE].append(factor.type.name) section[investigation_headers.STUDY_FACTOR_TYPE_TERM_ACCESSION_NUMBER].append( factor.type.accession ) section[investigation_headers.STUDY_FACTOR_TYPE_TERM_SOURCE_REF].append( factor.type.ontology_name ) else: section[investigation_headers.STUDY_FACTOR_TYPE].append(factor.type) section[investigation_headers.STUDY_FACTOR_TYPE_TERM_ACCESSION_NUMBER].append("") section[investigation_headers.STUDY_FACTOR_TYPE_TERM_SOURCE_REF].append("") comments = _extract_comments(study.factors.values()) headers = _extract_section_header( list(study.factors.values())[0] if study.factors else None, investigation_headers.STUDY_FACTORS, ) self._write_section(investigation_headers.STUDY_FACTORS, section, comments, headers) def _write_study_assays(self, study: models.StudyInfo): # Write STUDY ASSAYS section section = _init_multi_column_section(investigation_headers.STUDY_ASSAYS_KEYS) for assay in study.assays: section[investigation_headers.STUDY_ASSAY_FILE_NAME].append(assay.path or "") if is_ontology_term_ref(assay.measurement_type): section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE].append( assay.measurement_type.name or "" ) section[ investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_ACCESSION_NUMBER ].append(assay.measurement_type.accession or "") section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_SOURCE_REF].append( assay.measurement_type.ontology_name or "" ) else: section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE].append( assay.measurement_type ) section[ investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_ACCESSION_NUMBER ].append("") section[investigation_headers.STUDY_ASSAY_MEASUREMENT_TYPE_TERM_SOURCE_REF].append( "" ) if is_ontology_term_ref(assay.technology_type): section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE].append( assay.technology_type.name or "" ) section[ investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_ACCESSION_NUMBER ].append(assay.technology_type.accession or "") section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_SOURCE_REF].append( assay.technology_type.ontology_name or "" ) else: section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE].append( assay.technology_type ) section[ investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_ACCESSION_NUMBER ].append("") section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_TYPE_TERM_SOURCE_REF].append( "" ) section[investigation_headers.STUDY_ASSAY_TECHNOLOGY_PLATFORM].append(assay.platform) comments = _extract_comments(study.assays) headers = _extract_section_header( list(study.assays)[0] if study.assays else None, investigation_headers.STUDY_ASSAYS ) self._write_section(investigation_headers.STUDY_ASSAYS, section, comments, headers) def _write_study_protocols(self, study: models.StudyInfo): # Write STUDY PROTOCOLS section section = _init_multi_column_section(investigation_headers.STUDY_PROTOCOLS_KEYS) for protocol in study.protocols.values(): section[investigation_headers.STUDY_PROTOCOL_NAME].append(protocol.name) if is_ontology_term_ref(protocol.type): section[investigation_headers.STUDY_PROTOCOL_TYPE].append(protocol.type.name or "") section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_ACCESSION_NUMBER].append( protocol.type.accession or "" ) section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_SOURCE_REF].append( protocol.type.ontology_name or "" ) else: section[investigation_headers.STUDY_PROTOCOL_TYPE].append(protocol.type) section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_ACCESSION_NUMBER].append("") section[investigation_headers.STUDY_PROTOCOL_TYPE_TERM_SOURCE_REF].append("") section[investigation_headers.STUDY_PROTOCOL_DESCRIPTION].append(protocol.description) section[investigation_headers.STUDY_PROTOCOL_URI].append(protocol.uri) section[investigation_headers.STUDY_PROTOCOL_VERSION].append(protocol.version) names = [] accessions = [] ontologies = [] for parameter in protocol.parameters.values(): if is_ontology_term_ref(parameter): names.append(parameter.name or "") accessions.append(parameter.accession or "") ontologies.append(parameter.ontology_name or "") else: names.append(parameter.name) accessions.append("") ontologies.append("") section[investigation_headers.STUDY_PROTOCOL_PARAMETERS_NAME].append(";".join(names)) section[ investigation_headers.STUDY_PROTOCOL_PARAMETERS_NAME_TERM_ACCESSION_NUMBER ].append(";".join(accessions)) section[investigation_headers.STUDY_PROTOCOL_PARAMETERS_NAME_TERM_SOURCE_REF].append( ";".join(ontologies) ) names = [] types = [] accessions = [] ontologies = [] for component in protocol.components.values(): names.append(component.name) if is_ontology_term_ref(component.type): types.append(component.type.name or "") accessions.append(component.type.accession or "") ontologies.append(component.type.ontology_name or "") else: names.append(component.type) accessions.append("") ontologies.append("") section[investigation_headers.STUDY_PROTOCOL_COMPONENTS_NAME].append(";".join(names)) section[investigation_headers.STUDY_PROTOCOL_COMPONENTS_TYPE].append(";".join(types)) section[ investigation_headers.STUDY_PROTOCOL_COMPONENTS_TYPE_TERM_ACCESSION_NUMBER ].append(";".join(accessions)) section[investigation_headers.STUDY_PROTOCOL_COMPONENTS_TYPE_TERM_SOURCE_REF].append( ";".join(ontologies) ) comments = _extract_comments(study.protocols.values()) headers = _extract_section_header( list(study.protocols.values())[0] if study.protocols else None, investigation_headers.STUDY_PROTOCOLS, ) self._write_section(investigation_headers.STUDY_PROTOCOLS, section, comments, headers) def _write_study_contacts(self, study: models.StudyInfo): # Write STUDY CONTACTS section section = _init_multi_column_section(investigation_headers.STUDY_CONTACTS_KEYS) for contact in study.contacts: section[investigation_headers.STUDY_PERSON_LAST_NAME].append(contact.last_name) section[investigation_headers.STUDY_PERSON_FIRST_NAME].append(contact.first_name) section[investigation_headers.STUDY_PERSON_MID_INITIALS].append(contact.mid_initial) section[investigation_headers.STUDY_PERSON_EMAIL].append(contact.email) section[investigation_headers.STUDY_PERSON_PHONE].append(contact.phone) section[investigation_headers.STUDY_PERSON_FAX].append(contact.fax) section[investigation_headers.STUDY_PERSON_ADDRESS].append(contact.address) section[investigation_headers.STUDY_PERSON_AFFILIATION].append(contact.affiliation) if is_ontology_term_ref(contact.role): section[investigation_headers.STUDY_PERSON_ROLES].append(contact.role.name or "") section[investigation_headers.STUDY_PERSON_ROLES_TERM_ACCESSION_NUMBER].append( contact.role.accession or "" ) section[investigation_headers.STUDY_PERSON_ROLES_TERM_SOURCE_REF].append( contact.role.ontology_name or "" ) else: section[investigation_headers.STUDY_PERSON_ROLES].append(contact.role) section[investigation_headers.STUDY_PERSON_ROLES_TERM_ACCESSION_NUMBER].append("") section[investigation_headers.STUDY_PERSON_ROLES_TERM_SOURCE_REF].append("") comments = _extract_comments(study.contacts) headers = _extract_section_header( list(study.contacts)[0] if study.contacts else None, investigation_headers.STUDY_CONTACTS, ) self._write_section(investigation_headers.STUDY_CONTACTS, section, comments, headers)