<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510:0</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>doi-10-17044-scilifelab-24039510</r:ID>
  <r:Version>0</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Science for Life Laboratory</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Andreas</a:FirstGiven>
            <a:LastFamily>Wallberg</a:LastFamily>
            <a:FullName>
              <r:String>Andreas Wallberg</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-9081-9663</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">doi-10-17044-scilifelab-24039510</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="en">3. Comparative population transcriptomics in krill: orthogroups (FASTA, TSV files)</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2023-10-19</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.17044/SCILIFELAB.24039510</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="en">This item contains a gzipped archive with ~13,000 orthogroups used to study molecular evolution in this project.

Archive:

krill.orthogroups.tar.gz

Contents of archive (FILE,SIZE,SPECIES,SAMPLES,SNPs):

- krill.proteinortho.tsv - the primary output table from Proteinortho. Describes which protein sequences from which species belong to the same orthogroup. Format according to the standard output of the program.
- krill.proteinortho.tsv.seqs.csv - a processed table that also contains the actual sequences line by line (see below).
- the alignments directory, which contains all OGs in unaligned and aligned files in FASTA format (see below).
Format of the krill.proteinortho.tsv.seqs.csv table

The fields are:

- NR = orthogroup number
- ORTHO_GROUP = orthogroup ID
- N_SPECIES = the number of species
- N_GENES = the number of genes/sequences in this orthogroup
- N_MATCHING[o] = number of sequences matching outgroup species for this orthogroup
- N_NON_MATCHING = number of sequences matching ingroup species for this orthogroup
- HEADER = the name of this particular sequence
- SEQ = the protein sequence
Contents of the alignments directory

Each orthogroup is represented by up to four FASTA files:

- OG*.cds.ginsi.fasta.orig = the original, unaligned and unfiltered sequences
- OG*.cds.ginsi.fasta = the aligned and filtered sequences
- OG*.cds.ginsi.fasta.without_cold_euphausia.fasta = the aligned and filtered sequences after removing cold-associated Euphausia species
- OG*.cds.ginsi.fasta.without_cold_thysanoessa.fasta = the aligned and filtered sequences after removing cold-associated Thysanoessa species</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medical and Health Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medicin och hälsovetenskap</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ecology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10611" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Ekologi</r:Subject>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-24039510.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary"></a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>0</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>