<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:2024-128:1</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>2024-128</r:ID>
  <r:Version>1</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:2024-128.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:2024-128.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:2024-128.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2024-128.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Medicine, Huddinge / Center for Hematology and Regenerative Medicine (HERM), Karolinska Institutet</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Jonas</a:FirstGiven>
            <a:LastFamily>Thier</a:LastFamily>
            <a:FullName>
              <r:String>Jonas Thier</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0009-0007-5226-0372</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2024-128.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Medicine, Huddinge / Center for Hematology and Regenerative Medicine (HERM), Karolinska Institutet</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Eva</a:FirstGiven>
            <a:LastFamily>Hellström-Lindberg</a:LastFamily>
            <a:FullName>
              <r:String>Eva Hellström-Lindberg</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-7839-3743</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2024-128.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Medicine, Huddinge / Center for Hematology and Regenerative Medicine (HERM), Karolinska Institutet</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Pedro</a:FirstGiven>
            <a:LastFamily>Moura</a:LastFamily>
            <a:FullName>
              <r:String>Pedro Moura</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-0493-5394</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2024-128.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Medicine, Huddinge / Center for Hematology and Regenerative Medicine (HERM), Karolinska Institutet</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Vanessa</a:FirstGiven>
            <a:LastFamily>Lundin</a:LastFamily>
            <a:FullName>
              <r:String>Vanessa Lundin</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0003-2335-3370</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:2024-128.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">2024-128</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="sv">RNA sekvensering av erytroblaster från inducerade pluripotenta stamcellslinjer (iPSC) med vildtyp och muterad SF3B1</r:String>
        <r:String xml:lang="en">Bulk RNA sequencing of erythroblasts from a pair of SF3B1-mutated and SF3B1-wildtype induced pluripotent stem cell (iPSC) lines</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:2024-128.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Karolinska Institutet</r:String>
          <r:String xml:lang="en">Karolinska Institutet</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Karolinska Institutet</r:String>
          <r:String xml:lang="en">Karolinska Institutet</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2025-08-12</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.48723/3hs1-0v44</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="sv">Syftet med denna datainsamling var att bedöma hur SF3B1-mutation förändrar den molekylära profilen för RNA-splitsning vid erytropoes. Denna datauppsättning består av RNA sekvensering av erytroblaster från ett par av SF3B1-muterade och SF3B1-vildtyp inducerade pluripotenta stamcell (iPSC). Se den engelska beskrivningen för mer information.</r:Content>
      <r:Content xml:lang="en">This dataset consists of bulk RNA sequencing data of MACS-separated GPA+ erythroblasts obtained from a pair of induced pluripotent stem cell (iPSC) lines with and without SF3B1-mutation, generated from an MDS patient (Asimomitis G et al. 2022, Blood Advances). The objective of this data collection was to assess how SF3B1 mutation changes the molecular profile of RNA splicing in erythropoiesis.

This dataset includes minimally processed, visualisation-ready .bam format sequencing data for both of the lines.
 
Processing: 
MDS patient iPSC line-derived hematopoietic stem and progenitor cells (HSPC) were cultured for 14 days in erythroid specification media (StemPro-34 SFM [Gibco] + 1% Pen/Strep [Cytiva], 2 mM L-glutamine [Sigma-Aldrich], 3.5 µM 1-Thioglycerol [Sigma-Aldrich], 1% Bovine Albumin Fraction V [Gibco], 150 µg/mL holo-transferrin [Sigma-Aldrich], 2 U/mL erythropoietin [Pfizer], 50 ng/mL Stem Cell Factor [PeproTech] and 50 ng/mL interleukin-3 [PeproTech]).
At Day 14 of culture, mixed glycophorin A-positive (GPA+) erythroblast samples were isolated through MACS. Cells were lysed in RLT (Qiagen) + 40 mM dithiothreitol (Sigma-Aldrich) and RNA extraction was performed with RNeasy Micro Kit (Qiagen) with RNase-free DNase treatment according to the manufacturer’s protocol. RNA integrity numbers (RIN) were estimated using Agilent RNA 6000 Pico Kits (Agilent Technologies, CA, USA). A minimum RIN value of 6.5 was considered adequate. RNA sequencing (RNAseq) libraries were prepared from total RNA using SMARTer Stranded Total RNA-Seq Kits v2 - Pico Input Mammalian (Takara Bio, Japan), including enzymatic ribosomal depletion steps. Libraries were sequenced using an Illumina Novaseq 6000 S4 (Illumina, CA, USA) with paired-end 150bp configuration. Reads were pre-processed with TrimGalore v. 0.6.7 using CutAdapt v. 3.5 and BAM files were generated through via two-pass alignment with STAR v. 2.7.9a against the GRCh38.p13 human genome assembly.

The dataset consists of 13 files: 
- 2 .bam files, one for the SF3B1-mutant sample and one for the wildtype sample;
- 2. bai bam index files, one for each sample to facilitate analysis of the .bam files.
- 8 .fastq raw data files, corresponding to a paired-end run of the two samples in two different lanes (2 x 2 x 2).
- 1 gene-collapsed read count matrix (.txt) summarising read counts for both samples.

The documentation file iPSCEB_FileList.txt contains a full list of the files in the dataset.
The total size of the dataset is approximately 80 GB.</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:2024-128.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="10604" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Cell Biology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10604" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Cellbiologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetics and Genomics</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10609" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Genetik och genomik</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="10610" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Bioinformatics and Computational Biology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10610" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Bioinformatik och beräkningsbiologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="30202" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Hematology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="30202" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Hematologi</r:Subject>
        <r:Subject xml:lang="en" controlledVocabularyID="30203" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Cancer and Oncology</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="30203" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Cancer och onkologi</r:Subject>
        <r:Keyword xml:lang="en" controlledVocabularyID="D009190" controlledVocabularyName="MeSH">Myelodysplastic Syndromes</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="D009190" controlledVocabularyName="MeSH">Myelodysplastiskt syndrom</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="D054437" controlledVocabularyName="MeSH">Myelodysplastic-Myeloproliferative Diseases</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="D054437" controlledVocabularyName="MeSH">Myelodysplastiska-myeloproliferativa syndrom</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="D057026" controlledVocabularyName="MeSH">Induced Pluripotent Stem Cells</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="D057026" controlledVocabularyName="MeSH">Inducerade pluripotenta stamceller</r:Keyword>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <r:AnalysisUnit controlledVocabularyID="AnalysisUnit" controlledVocabularyAgencyName="DDI Alliance">snd.a01</r:AnalysisUnit>
    <r:AnalysisUnitsCovered>
      <r:String xml:lang="en">Cells</r:String>
      <r:String xml:lang="sv">Celler</r:String>
    </r:AnalysisUnitsCovered>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:2024-128.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:2024-128.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary">restrictedAccess</a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>2</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>