<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220:0</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>doi-10-5281-zenodo-7506220</r:ID>
  <r:Version>0</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.OtherMaterialScheme:2.0</r:URN>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Linguistics, Stockholm University</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Robert</a:FirstGiven>
            <a:LastFamily>Östling</a:LastFamily>
            <a:FullName>
              <r:String>Östling, Robert</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-6027-4156</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Linguistics, Stockholm University</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Murathan</a:FirstGiven>
            <a:LastFamily>Kurfalı</a:LastFamily>
            <a:FullName>
              <r:String>Kurfalı, Murathan</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-7020-8275</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">doi-10-5281-zenodo-7506220</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="en">Parallel text typology dataset</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Stockholms universitet</r:String>
          <r:String xml:lang="en">Stockholm University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Stockholms universitet</r:String>
          <r:String xml:lang="en">Stockholm University</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2023-01-05</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.5281/zenodo.7506220</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="en">This repository contains data accompanying the following paper:

Neural models can sometimes discover typological generalizations. Computational Linguistics (2023) 49 (4): 1003–1051. https://doi.org/10.1162/coli_a_00491

It contains the following information for 1295 different languages:

language vector representations from a range of neural models

automatically derived lists of affixes

automatically derived lists of inflectional paradigms

typological features derived from annotation projection, and statistics on dependency relations

typological features derived from classifiers trained on language vectors and typological databases

automatically derived word lists

data needed for automatic evaluation of language representations (code in separate repository)

Note that the multilingual word embeddings described in the paper are very large, and therefore distributed in a separate public repository.

The computations were enabled by resources provided by the Swedish National Infrastructure for Computing (SNIC) at C3SE partially funded by the Swedish Research Council through grant agreement no. 2018-05973. This work was funded in part by the Swedish Research Council through grant agreement no. 2019-04129.</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="10208" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Natural Language Processing</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10208" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Språkbehandling och datorlingvistik</r:Subject>
      </r:TopicalCoverage>
      <r:SpatialCoverage />
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:doi-10-5281-zenodo-7506220.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary">openAccess</a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>0</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>