<ddi:DDIInstance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="ddi:instance:3_3 http://ddialliance.org/Specification/DDI-Lifecycle/3.3/XMLSchema/instance.xsd" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ddi="ddi:instance:3_3" xmlns:r="ddi:reusable:3_3" xmlns:s="ddi:studyunit:3_3" xmlns:d="ddi:datacollection:3_3" xmlns:a="ddi:archive:3_3" xmlns:c="ddi:conceptualcomponent:3_3" xmlns:cm="ddi:comparative:3_3" xmlns:g="ddi:group:3_3" xmlns:l="ddi:logicalproduct:3_3" xmlns:p="ddi:physicaldataproduct:3_3" xmlns:pi="ddi:physicalinstance:3_3" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xml="http://www.w3.org/XML/1998/namespace" isMaintainable="true" scopeOfUniqueness="Agency">
  <r:URN>urn:ddi:se.researchdata:2023-257:1</r:URN>
  <r:Agency>SND</r:Agency>
  <r:ID>2023-257</r:ID>
  <r:Version>1</r:Version>
  <g:ResourcePackage>
    <r:URN>urn:ddi:se.researchdata:2023-257.ResourcePackage:2.0</r:URN>
    <r:OtherMaterialScheme>
      <r:URN>urn:ddi:se.researchdata:2023-257.OtherMaterialScheme:2.0</r:URN>
      <r:OtherMaterial>
        <r:URN>urn:ddi:se.researchdata:2023-246:1.1</r:URN>
        <r:TypeOfMaterial>Dataset</r:TypeOfMaterial>
        <r:Citation>
          <r:Title>
            <r:String xml:lang="sv">Dataset med tillståndsövervakningsvibrationsdata annoterat med tekniskt språk, från pappersmaskinsindustri i norra Sverige</r:String>
            <r:String xml:lang="en">Dataset with condition monitoring vibration data annotated with technical language, from paper machine industries in northern Sweden</r:String>
          </r:Title>
        </r:Citation>
        <r:ExternalURLReference>https://snd.se/catalogue/dataset/2023-246</r:ExternalURLReference>
      </r:OtherMaterial>
    </r:OtherMaterialScheme>
    <a:OrganizationScheme>
      <r:URN>urn:ddi:se.researchdata:2023-257.OrganizationScheme-0:2.0</r:URN>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2023-257.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Computer Science, Electrical and Space Engineering, Luleå University of Technology</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Karl</a:FirstGiven>
            <a:LastFamily>Löwenmark</a:LastFamily>
            <a:FullName>
              <r:String>Karl Löwenmark</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0002-0188-9337</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2023-257.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Computer Science, Electrical and Space Engineering, Luleå University of Technology</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Fredrik</a:FirstGiven>
            <a:LastFamily>Sandin</a:LastFamily>
            <a:FullName>
              <r:String>Fredrik Sandin</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0001-5662-825X</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2023-257.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>Department of Computer Science, Electrical and Space Engineering, Luleå University of Technology</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Marcus</a:FirstGiven>
            <a:LastFamily>Liwicki</a:LastFamily>
            <a:FullName>
              <r:String>Marcus Liwicki</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0003-4029-6574</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
      <a:Individual>
        <r:URN>urn:ddi:se.researchdata:2023-257.Individual-0:2.0</r:URN>
        <r:UserAttributePair>
          <r:AttributeKey>affiliation</r:AttributeKey>
          <r:AttributeValue>SKF (Sweden)</r:AttributeValue>
        </r:UserAttributePair>
        <a:IndividualIdentification>
          <a:IndividualName>
            <a:FirstGiven>Stephan</a:FirstGiven>
            <a:LastFamily>Schnabel</a:LastFamily>
            <a:FullName>
              <r:String>Stephan Schnabel</r:String>
            </a:FullName>
          </a:IndividualName>
          <a:ResearcherID>
            <a:TypeOfID>ORCID</a:TypeOfID>
            <a:ResearcherIdentification>0000-0001-7459-3484</a:ResearcherIdentification>
          </a:ResearcherID>
        </a:IndividualIdentification>
      </a:Individual>
    </a:OrganizationScheme>
  </g:ResourcePackage>
  <s:StudyUnit>
    <r:URN>urn:ddi:se.researchdata:2023-257.StudyUnit:2.0</r:URN>
    <r:UserID typeOfUserID="datasetIdentifier">2023-257</r:UserID>
    <r:UserID typeOfUserID="principalRegistrationNumber">2019-02533</r:UserID>
    <r:Citation>
      <r:Title>
        <r:String xml:lang="sv">Dataset med annoteringar av tekniskt språk från fyra års tillståndsövervakning av pappersmaskinsindustri i norra Sverige</r:String>
        <r:String xml:lang="en">Dataset with four years of condition monitoring technical language annotations from paper machine industries in northern Sweden</r:String>
      </r:Title>
      <r:Creator>
        <r:CreatorReference>
          <r:URN>urn:ddi:se.researchdata:2023-257.Individual-0:2.0</r:URN>
          <r:TypeOfObject>Individual</r:TypeOfObject>
        </r:CreatorReference>
      </r:Creator>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Luleå tekniska universitet</r:String>
          <r:String xml:lang="en">Luleå University of Technology</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:Publisher>
        <r:PublisherName>
          <r:String xml:lang="sv">Luleå tekniska universitet</r:String>
          <r:String xml:lang="en">Luleå University of Technology</r:String>
        </r:PublisherName>
      </r:Publisher>
      <r:PublicationDate>
        <r:SimpleDate>2023-12-21</r:SimpleDate>
      </r:PublicationDate>
      <r:InternationalIdentifier>
        <r:IdentifierContent>10.5878/hafd-ms27</r:IdentifierContent>
        <r:ManagingAgency controlledVocabularyAgencyName="DOI">DOI</r:ManagingAgency>
      </r:InternationalIdentifier>
    </r:Citation>
    <r:Abstract>
      <r:Content xml:lang="sv">Detta dataset består av tekniskt-språk-annoteringar från fyra års insamling från två pappersmaskiner i norra Sverige,  strukturerat som en Pandas dataframe. Samma data finns också tillgänglig som en semikolonseparerad .csv-fil. Datan består av två kolumner, där den första kolumnen motsvarar annoteringens textinnehåll, och den andra titeln. Annoteringarna är skrivna på svenska, och processade så att alla egennamn ersatts av textsträngen ’egennamn’. Varje rad motsvarar en annotering med titel.

Data behandlas i Python med:
import pandas as pd
annotations_df = pd.read_pickle("Technical_Language_Annotations.pkl")
annotation_contents = annotations_df['noteComment']
annotation_titles = annotations_df['title']</r:Content>
      <r:Content xml:lang="en">This dataset consists of four years of technical language annotations from two paper machines in northern Sweden, structured as a Pandas dataframe. The same data is also available as a semicolon-separated .csv file. The data consists of two columns, where the first column corresponds to annotation note contents, and the second column corresponds to annotation titles. The annotations are in Swedish, and processed so that all mentions of personal information are replaced with the string ‘egennamn’, meaning “personal name” in Swedish. Each row corresponds to one annotation with the corresponding title.

Data can be accessed in Python with:
import pandas as pd
annotations_df = pd.read_pickle("Technical_Language_Annotations.pkl")
annotation_contents = annotations_df['noteComment']
annotation_titles = annotations_df['title']</r:Content>
    </r:Abstract>
    <r:Coverage>
      <r:TopicalCoverage>
        <r:URN>urn:ddi:se.researchdata:2023-257.TopicalCoverage:2.0</r:URN>
        <r:Subject xml:lang="en" controlledVocabularyID="10208" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Natural Language Processing</r:Subject>
        <r:Subject xml:lang="sv" controlledVocabularyID="10208" controlledVocabularyName="Standard för svensk indelning av forskningsämnen 2025">Språkbehandling och datorlingvistik</r:Subject>
        <r:Keyword xml:lang="en" controlledVocabularyID="p10598" controlledVocabularyName="YSO">paper machines</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p10598" controlledVocabularyName="YSO">pappersmaskiner</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p15423" controlledVocabularyName="YSO">condition monitoring</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p15423" controlledVocabularyName="YSO">övervakning av skick</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p6071" controlledVocabularyName="YSO">language technology</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p6071" controlledVocabularyName="YSO">språkteknologi</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p12266" controlledVocabularyName="YSO">signal processing</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p12266" controlledVocabularyName="YSO">signalbehandling</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="p10706" controlledVocabularyName="YSO">paper industry</r:Keyword>
        <r:Keyword xml:lang="sv" controlledVocabularyID="p10706" controlledVocabularyName="YSO">pappersindustri</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="43098" controlledVocabularyName="NASA Thesaurus">fault detection</r:Keyword>
        <r:Keyword xml:lang="en" controlledVocabularyID="47942" controlledVocabularyName="NASA Thesaurus">natural language processing</r:Keyword>
      </r:TopicalCoverage>
      <r:SpatialCoverage>
        <r:URN>urn:ddi:se.researchdata:2023-257.SpatialCoverage:2.0</r:URN>
        <r:Description>
          <r:Content xml:lang="en">Northern Sweden</r:Content>
        </r:Description>
        <r:CountryCode>SE</r:CountryCode>
      </r:SpatialCoverage>
      <r:TemporalCoverage>
        <r:URN>urn:ddi:se.researchdata:2023-257.TemporalCoverage:2.0</r:URN>
        <r:ReferenceDate>
          <r:StartDate>2018</r:StartDate>
          <r:EndDate>2022</r:EndDate>
        </r:ReferenceDate>
      </r:TemporalCoverage>
    </r:Coverage>
    <a:Archive>
      <r:URN>urn:ddi:se.researchdata:2023-257.Archive:2.0</r:URN>
      <a:ArchiveSpecific>
        <a:Item>
          <a:Access>
            <r:URN>urn:ddi:se.researchdata:2023-257.Archive-ArchiveSpecificType-AccessType:2.0</r:URN>
            <a:TypeOfAccess controlledVocabularyName="info:eu-repo-Access-Terms vocabulary">restrictedAccess</a:TypeOfAccess>
          </a:Access>
          <a:DataFileQuantity>1</a:DataFileQuantity>
        </a:Item>
      </a:ArchiveSpecific>
    </a:Archive>
  </s:StudyUnit>
</ddi:DDIInstance>