<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748:0</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>doi-10-17044-scilifelab-25908748</r:ID>
  <r:Version>0</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Jacob</a:FirstGiven>
            <a:LastFamily>Höglund</a:LastFamily>
            <a:FullName>
              <r:String>Jacob Höglund</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Guilherme</a:FirstGiven>
            <a:LastFamily>Dias</a:LastFamily>
            <a:FullName>
              <r:String>Guilherme Dias</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Remi-André</a:FirstGiven>
            <a:LastFamily>Olsen</a:LastFamily>
            <a:FullName>
              <r:String>Remi-André Olsen</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>André</a:FirstGiven>
            <a:LastFamily>Soares</a:LastFamily>
            <a:FullName>
              <r:String>André Soares</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Ignas</a:FirstGiven>
            <a:LastFamily>Bunikis</a:LastFamily>
            <a:FullName>
              <r:String>Ignas Bunikis</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Venkat</a:FirstGiven>
            <a:LastFamily>Talla</a:LastFamily>
            <a:FullName>
              <r:String>Venkat Talla</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Niclas</a:FirstGiven>
            <a:LastFamily>Backström</a:LastFamily>
            <a:FullName>
              <r:String>Niclas Backström</r:String>
            </a:FullName>
          </a:IndividualName>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">doi-10-17044-scilifelab-25908748</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="en">Supplemental data from the genome assembly and annotation of the Clouded Apollo Butterfly (Parnassius mnemosyne)</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Uppsala universitet</r:String>
          <r:String xml:lang="en">Uppsala University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2024-06-26</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.17044/SCILIFELAB.25908748</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="en">This dataset contains supplementary data from the genome sequencing of the Clouded Apollo Butterfly (Parnassius mnemosyne), published in:

Höglund, J., Dias, G., Olsen, R. A., Soares, A., Bunikis, I., Talla, V., &amp; Backström, N. (2024). A Chromosome-Level Genome Assembly and Annotation for the Clouded Apollo Butterfly (Parnassius mnemosyne): A Species of Global Conservation Concern. Genome Biology and Evolution, 16(2), evae031. https://doi.org/10.1093/gbe/evae031

Previous data from the project has been deposited at the European Nucleotide Archive (ENA) in the umbrella project PRJEB76269 (https://www.ebi.ac.uk/ena/browser/view/PRJEB76269) .

The data contained in this archive at SciLifeLab Data Repository describe the genome assembly (ENA accession: GCA_963668995.1 (https://www.ebi.ac.uk/ena/browser/view/GCA_963668995.1) ), and the mitochondrial genome assembly (ENA accession: OZ075093.1 (https://www.ebi.ac.uk/ena/browser/view/OZ075093.1) ).

Below follows a brief description of each file. The information on the methods used to generate the files was adapted from Höglund et al. 2024.

- pmne_functional_edit1.gff.gz
contains the functional annotation (protein coding genes) of the primary genome assembly (GCA_963668995.1 (https://www.ebi.ac.uk/ena/browser/view/GCA_963668995.1) ). This is the original file that was submitted to ENA. A derived version of the file is available from NCBI; the NCBI version was generated from the EMBL records of each annotated gene and differs in that it for instance use a different naming scheme for the seqid column and the locus tags. The NCBI version is available at this link (https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/963/668/995/GCA_963668995.1_Parnassius_mnemosyne_n_2023_11/GCA_963668995.1_Parnassius_mnemosyne_n_2023_11_genomic.gff.gz) .

The genes were predicted using BRAKER (v3.03), GALBA (v1.0.6), and GeneMarkS-T (v5.1). The resulting gene models were combined and filtered using TSEBRA (version: long_reads branch commit 1f2614). The combined gene model was functionally annotated by the NBIS nextflow pipeline v2.0.0 (https://github.com/NBISweden).

- pmne_Illumina_RNAseq_StringTie_sorted-transcripts_match.gff.gz
contains a transcript assembly of the Illumina RNAseq reads (ENA accession: ERX11559451 (https://www.ebi.ac.uk/ena/browser/view/ERX11559451) ). The reads were aligned to the genome with HiSat2 (v2.1.0) and then assembled with StringTie (v2.2.1).

- pmne_mtdna.gff.gz
contains the functional annotation of the mitochondrial genome assembly (ENA accession: OZ075093.1 (https://www.ebi.ac.uk/ena/browser/view/OZ075093.1) ). This is the original file that was submitted to ENA. The annotation was generated using MitoFinder (v1.4.1).

- pmne_ncRNAs.gff.gz
contains the annotation of putative non-coding RNA (ncRNA) genes. The prediction was done with Infernal (v1.1.4) and the Rfam (v14.1) covariance models.

- pmne_tRNAs_and_pseudogenes.gff.gz
contains the annotation of putative tRNA genes and pseudogenes. The prediction was done with tRNAscan-SE (v2.0.12).

- pmne_PacBio_isoseq.sorted.bam
contains the PacBio IsoSeq transcripts (ENA accession: ERX11559436 (https://www.ebi.ac.uk/ena/browser/view/ERX11559436) ) aligned to the primary genome assembly.

- pmne_repeat_library.fa.gz
contains the nucleotide sequences of the prediced repeats in fasta format. The prediction was done with RepeatModeler2 (v2.0.2a).

Available variablesFor a description of the column headers of the files, please see the following links to the documentation of the different file formats.

The GFF3 format (.gff) is described here: https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md

The BAM format (.bam) is a compressed version of the SAM format, both of which are described here: https://samtools.github.io/hts-specs/SAMv1.pdf

The fasta (.fa) format is described here: https://www.ncbi.nlm.nih.gov/genbank/fastaformat/

ContactFor questions about this dataset, please contact:
jacob.hoglund@ebc.uu.se
niclas.backstrom@ebc.uu.se</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medical and Health Sciences</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="3" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Medicin och hälsovetenskap</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetics and Genomics</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetik och genomik</r:Subject>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:doi-10-17044-scilifelab-25908748.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary"></a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>0</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>