HTTP 200 OK
Allow: GET, POST, OPTIONS
Content-Type: application/json
Vary: Accept
{
"count": 5025,
"next": "https://metax.demo.fairdata.fi/v3/datasets?format=api&limit=20&offset=5020",
"previous": "https://metax.demo.fairdata.fi/v3/datasets?format=api&limit=20&offset=4980",
"results": [
{
"id": "207f1e82-c73f-46ef-9462-435f4dc55c2c",
"access_rights": {
"id": "532cce3b-c05d-4235-864c-dec3a7cb1bda",
"license": [
{
"id": "64b8be74-3aac-4b1d-8b0a-1b8d3c15a9e9",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC-BY-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons Attribution 1.0 Generic (CC BY 1.0)",
"fi": "Creative Commons Nimeä 1.0 Yleinen (CC BY 1.0)"
}
}
],
"access_type": {
"id": "b41462f7-00bf-4e50-935b-2bf1184453a7",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "61d8fd85-9f7d-4ee4-9d5e-dad3852a77ad",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "5b171a59-e5b4-4cd3-96cc-4c38b451f0d4",
"pref_label": {
"en": "Prime Minister's Office"
},
"homepage": {
"url": "http://vnk.fi/en/frontpage"
},
"email": "<hidden>"
}
},
{
"id": "371c8d96-b189-4cd4-8f26-6e3afd61dd6c",
"roles": [
"curator"
],
"person": {
"id": "0171294f-c2af-405b-8b0d-b7b02afabe68",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "0e52a03b-c3d3-45ab-bc0b-8a784000c059",
"roles": [
"publisher"
],
"organization": {
"id": "15d05bfb-b7a0-4de1-8fbb-c9cd9115b348",
"pref_label": {
"en": "Multiple publishers, check distribution rights holders in original metadata by following its persistent identifier"
}
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The \"Hallituskausi 2011–2015\" translation memory is intended for those translating administrative texts between Finnish and English. It includes key policy reports published by the Finnish ministries on their websites during the ongoing electoral period. The memory features some 11,000 Finnish-to-English translation segments.\n\nThe translation memory runs in a SDL Trados Studio programme. \n\nThe translation segments may contain errors due to the technical conversion process (for example, additional or missing hyphens which could affect the search function). \n\nThe translation memory is for reference use only. Use of the memory calls for case-by-case consideration and it must never be applied as such.\n\nThe Prime Minister’s Office is not responsible for any translation or other errors in the translation memory.\n\nThe is available for download at https://kielipankki.fi/download/"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
},
{
"id": "8f8d38be-f5ee-490f-a33a-c8ddceef9785",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2017090403",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "The \"Hallituskausi 2011–2015\" Translation Memory"
},
"created": "2022-11-17T00:00:00Z",
"modified": "2024-12-04T13:06:39Z",
"dataset_versions": [
{
"id": "207f1e82-c73f-46ef-9462-435f4dc55c2c",
"title": {
"en": "The \"Hallituskausi 2011–2015\" Translation Memory"
},
"persistent_identifier": "urn:nbn:fi:lb-2017090403",
"state": "published",
"created": "2022-11-17T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "2d3fcfc9-f17c-4777-b2f1-7e88bb747a34",
"access_rights": {
"id": "08d7c3d4-99d9-4e2f-a691-ab9e71123100",
"license": [
{
"id": "64b8be74-3aac-4b1d-8b0a-1b8d3c15a9e9",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC-BY-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons Attribution 1.0 Generic (CC BY 1.0)",
"fi": "Creative Commons Nimeä 1.0 Yleinen (CC BY 1.0)"
}
}
],
"access_type": {
"id": "b41462f7-00bf-4e50-935b-2bf1184453a7",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "13465057-5ec3-4b08-a198-c701b3d4d42f",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "238666d0-57ac-4522-8420-a5310d8e6dc0",
"pref_label": {
"en": "Prime Minister's Office"
},
"homepage": {
"url": "http://vnk.fi/en/frontpage"
},
"email": "<hidden>"
}
},
{
"id": "cc0807e2-c101-4c0e-9586-ff36dbdf6dee",
"roles": [
"curator"
],
"person": {
"id": "7d488683-d043-4d55-9d45-19e49f4cd069",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "5e0b3cf2-cfe6-442d-9bfd-4adf2a0c157b",
"roles": [
"publisher"
],
"organization": {
"id": "a92e4c7b-2cc3-4172-81c5-ea510641e32c",
"pref_label": {
"en": "Multiple publishers, check distribution rights holders in original metadata by following its persistent identifier"
}
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The \"Hallituskausi 2007–2011\" translation memory is intended for those translating administrative texts between Finnish and English. It includes key policy reports published by the Finnish ministries on their websites. The memory features some 58,000 Finnish-to-English translation segments.\n\nThe tmx format requires a SDL Trados Studio programme. \n\nThe translation segments may contain errors due to the technical conversion process (for example, additional or missing hyphens which could affect the search function). \n\nThe translation memory is for reference use only. Use of the memory calls for case-by-case consideration and it must never be applied as such.\n\nThe Prime Minister’s Office is not responsible for any translation or other errors in the translation memory. \n\nThe resource will be made available for download at https://kielipankki.fi/download/"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
},
{
"id": "8f8d38be-f5ee-490f-a33a-c8ddceef9785",
"url": "http://lexvo.org/id/iso639-3/eng",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "English",
"fi": "englanti",
"sv": "engelska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2017090402",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "The \"Hallituskausi 2007–2011\" Translation Memory"
},
"created": "2022-11-17T00:00:00Z",
"modified": "2024-12-04T13:06:35Z",
"dataset_versions": [
{
"id": "2d3fcfc9-f17c-4777-b2f1-7e88bb747a34",
"title": {
"en": "The \"Hallituskausi 2007–2011\" Translation Memory"
},
"persistent_identifier": "urn:nbn:fi:lb-2017090402",
"state": "published",
"created": "2022-11-17T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "0f20081d-e133-45c3-a1be-8570f66ca094",
"access_rights": {
"id": "f387358a-989e-45af-8bb5-d78aaeab43e9",
"license": [
{
"id": "cef12353-9b39-4c92-8a0c-8de977ac8d28",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2022050901",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "d0da98cd-9cd2-4352-b12a-3e3efcb585fd",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/research",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restriced access for research based on contract",
"fi": "Saatavuutta rajoitettu sopimuksen perusteella vain tutkimuskäyttöön",
"sv": "Begränsad åtkomst på bas av kontrakt ändast för forskningsändamål"
}
}
]
},
"actors": [
{
"id": "7838e423-5a46-4ef2-8847-76c57a197cf1",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "5aa2870a-1e84-4a01-a12e-34a7866c4409",
"pref_label": {
"en": "Finnish Broadcasting Company (Yle)",
"fi": "Yleisradio Oy"
},
"homepage": {
"url": "https://yle.fi/"
},
"email": "<hidden>"
}
},
{
"id": "653c49fd-663f-4394-bf68-6b403bb2c146",
"roles": [
"curator"
],
"person": {
"id": "2820f147-52ef-4432-a455-91568d2ceb23",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "2b2c1d86-0cf9-4fe6-8022-a21ee4eb025c",
"roles": [
"publisher"
],
"organization": {
"id": "507dc512-0be4-457c-a555-958c947c1062",
"pref_label": {
"en": "Multiple publishers, check distribution rights holders in original metadata by following its persistent identifier"
}
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The corpus, containing the articles from YLE https://yle.fi from 2011-2018, is available at korp.csc.fi/download"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2017070501",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Yle Finnish News Archive 2011-2018, source",
"fi": "Ylen suomenkielinen uutisarkisto 2011-2018, lähdeaineisto"
},
"created": "2022-12-17T00:00:00Z",
"modified": "2024-12-04T13:06:25Z",
"dataset_versions": [
{
"id": "0f20081d-e133-45c3-a1be-8570f66ca094",
"title": {
"en": "Yle Finnish News Archive 2011-2018, source",
"fi": "Ylen suomenkielinen uutisarkisto 2011-2018, lähdeaineisto"
},
"persistent_identifier": "urn:nbn:fi:lb-2017070501",
"state": "published",
"created": "2022-12-17T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "79ed8e8b-7370-4413-97ea-26455c6e738e",
"access_rights": {
"id": "90305489-f508-4020-8997-c80a2abf1f8d",
"license": [
{
"id": "cef12353-9b39-4c92-8a0c-8de977ac8d28",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2015041301",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "d0da98cd-9cd2-4352-b12a-3e3efcb585fd",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/research",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restriced access for research based on contract",
"fi": "Saatavuutta rajoitettu sopimuksen perusteella vain tutkimuskäyttöön",
"sv": "Begränsad åtkomst på bas av kontrakt ändast för forskningsändamål"
}
}
]
},
"actors": [
{
"id": "f92b4aa9-f9fd-43db-bc1c-feee6babe2e9",
"roles": [
"creator",
"rights_holder"
],
"person": {
"id": "323ce425-d63e-486c-8278-d95b920dc2fb",
"name": "Paavo Alku",
"email": "<hidden>"
},
"organization": {
"id": "2f9f9011-4061-4a93-96af-768471abada7",
"pref_label": {
"en": "Aalto University",
"fi": "Aalto-yliopisto",
"sv": "Aalto-universitetet",
"und": "Aalto-yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/10076",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "b7577837-3230-434b-94d5-4139ca2a11b7",
"roles": [
"curator"
],
"person": {
"id": "668fbf72-28ef-4d58-8c23-62d2b972d423",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "5971d7b2-f707-4855-8e54-a9eea3321d8c",
"roles": [
"publisher"
],
"organization": {
"id": "acdb7fff-aa12-45bf-a1f6-9f2a8b95233b",
"pref_label": {
"en": "Multiple publishers, check distribution rights holders in original metadata by following its persistent identifier"
}
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The corpus was available via the LAT platform in Kielipankki - the Language Bank of Finland until December 2020. Since the LAT platform was discontinued, this corpus version can no longer be used, but a downloadable version of the same content will be maintained (see Relations). Further details about the most recent version are available on the info page of this corpus family (https://www.kielipankki.fi/aineistot/aku-egg) \n\nThe corpus contains continuous speech during which glottal activity was recorded by means of electroglottography.\n\nLicense details: http://urn.fi/urn:nbn:fi:lb-2015041301.\n\nlog\n26.11.2018 link http://islrn.org/resources/604-045-695-230-7 removed",
"fi": "Tämä aineisto oli saatavilla Kielipankin LAT-alustan kautta joulukuuhun 2020 saakka, jolloin LAT-alusta poistui käytöstä. Aineistosta on kuitenkin saatavilla vastaavan sisältöinen ladattava versio (ks. Relations). Lisätietoja löytyy aineiston tietosivulta Kielipankissa (ks. https://www.kielipankki.fi/aineistot/aku-egg).\n\nKorpus sisältää jatkuvaa puhetta, jonka aikana on rekisteröity kurkunpään (äänihuulten) toimintaa elektroglottografian avulla.\n\nLisätietoa kielivaran lisenssistä: http://urn.fi/urn:nbn:fi:lb-2015041301\n\n\nlog\n26.11.2018 linkki http://islrn.org/resources/604-045-695-230-7 poistettu"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-20140730182",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Speech and EGG (Electroglottography) Simultaneous Recordings",
"fi": "Puheen ja EGG:n samanaikaiset tallenteet"
},
"created": "2023-04-05T00:00:00Z",
"modified": "2024-12-04T13:03:19Z",
"dataset_versions": [
{
"id": "79ed8e8b-7370-4413-97ea-26455c6e738e",
"title": {
"en": "Speech and EGG (Electroglottography) Simultaneous Recordings",
"fi": "Puheen ja EGG:n samanaikaiset tallenteet"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730182",
"state": "published",
"created": "2023-04-05T00:00:00Z",
"version": 1
}
],
"published_revision": 16,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "e5324440-ed23-4c52-b914-a04f5e51f814",
"access_rights": {
"id": "0947af4e-7f2c-4dd0-b7a6-003b99176058",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "d42a70c7-e215-4798-ad84-7bcb6d0542dd",
"roles": [
"creator"
],
"person": {
"id": "7bc01893-132c-41d0-9408-ad03fddc890b",
"name": "Matti Rahkonen",
"email": "<hidden>"
},
"organization": {
"id": "bdd6cb24-71fe-40e0-9ba0-bfb1ea32b50a",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "4712c574-515c-4499-b83a-c9317c7405b2",
"roles": [
"curator"
],
"person": {
"id": "65434134-6904-479e-a368-77b5fbba3ce3",
"name": "User support FIN-CLARIN",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "4f1668a4-d136-452d-9c53-619640e4ca6c",
"roles": [
"rights_holder"
],
"organization": {
"id": "f1a57e80-e3f3-4353-bd7d-67f252564fca",
"pref_label": {
"en": "The Matriculation Examination Board"
},
"homepage": {
"url": "https://www.ylioppilastutkinto.fi/fi/english"
},
"email": "<hidden>"
}
},
{
"id": "a742616b-352b-490d-9ef4-fe33d97116c9",
"roles": [
"publisher"
],
"organization": {
"id": "570ca63c-2128-42ee-ad9a-f29911bafa19",
"pref_label": {
"en": "Multiple publishers, check distribution rights holders in original metadata by following its persistent identifier"
}
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Finnish language essays / compositions written by Finnish-speaking students taking the matriculation examination in 1986. The corpus is available in Kielipankki - the Language Bank of Finland in Korp (https://korp.csc.fi/).\n\nCorpus access instruction: https://www.kielipankki.fi/support/corpus-location/ (in Finnish: https://www.kielipankki.fi/tuki/kayttooikeudet/) \n\nlog\n25.11.2018 link http://islrn.org/resources/596-037-998-620-1 removed\n\nInstructions on how to access Kielipankki corpora: https://www.kielipankki.fi/support/corpus-location/ (in Finnish: https://www.kielipankki.fi/tuki/aineiston-sijainti-kielipankissa/) \n\nImportant: when applying for the resource at https://lbr.csc.fi/ you are to provide, at the request of the IPR holder of the corpus (The Matriculation Examination Board of Finland), also information on your home address. Undergraduate students should provide their supervisor's email address as well. \n\nFor details on the license see http://urn.fi/urn:nbn:fi:lb-2016041801 (in Finnish: http://urn.fi/urn:nbn:fi:lb-2016041802)"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-20140730158",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "FinStud86 Corpus"
},
"created": "2022-11-17T00:00:00Z",
"modified": "2024-12-04T13:03:15Z",
"dataset_versions": [
{
"id": "e5324440-ed23-4c52-b914-a04f5e51f814",
"title": {
"en": "FinStud86 Corpus"
},
"persistent_identifier": "urn:nbn:fi:lb-20140730158",
"state": "published",
"created": "2022-11-17T00:00:00Z",
"version": 1
}
],
"published_revision": 16,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "a5feafcb-b10e-453a-8f41-2f96c6527274",
"access_rights": {
"id": "fc31e0be-f0ce-4e15-a423-c72a92446273",
"license": [
{
"id": "cef12353-9b39-4c92-8a0c-8de977ac8d28",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-201806052",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "d0da98cd-9cd2-4352-b12a-3e3efcb585fd",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/research",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restriced access for research based on contract",
"fi": "Saatavuutta rajoitettu sopimuksen perusteella vain tutkimuskäyttöön",
"sv": "Begränsad åtkomst på bas av kontrakt ändast för forskningsändamål"
}
}
]
},
"actors": [
{
"id": "7441b7f9-2946-4eeb-9a15-aec04b3db05d",
"roles": [
"creator",
"rights_holder"
],
"person": {
"id": "39b2af8b-53a5-4c74-82dc-60f7129bd7e7",
"name": "Hurskainen Arvi",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "4fa12b5e-2e67-45fd-9458-bfabff749e53",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "a80c3137-15c9-4569-9920-8c1b2943fb6c",
"roles": [
"curator"
],
"person": {
"id": "bda59a8e-dda2-4520-89a7-4b5dc6045b90",
"name": "User support at CSC - IT Center for Science Ltd. The Language Bank of Finland",
"email": "<hidden>"
},
"organization": {
"id": "477dfdb6-563e-41d9-badc-ec3975f79256",
"pref_label": {
"en": "CSC – IT Center for Science",
"fi": "CSC - Tieteen tietotekniikan keskus Oy",
"sv": "CSC – IT Center for Science",
"und": "CSC - Tieteen tietotekniikan keskus Oy"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/09206320",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The corpus is available for Download at Kielipankki - the Language Bank of Finland at http://urn.fi/urn:nbn:fi:lb-201803272\n\nThis is the downloadable version of Helsinki Corpus of Swahili 2.0 (HCS 2.0) Annotated Version, see http://urn.fi/urn:nbn:fi:lb-2016011301 for more information.\n\nThe Helsinki Corpus of Swahili 2.0 Annotated Version containing about 25 million words will be available in Kielipankki - the Language Bank of Finland in Download (https://korp.csc.fi/download) for academic use. This means that students and staff of universities can use the corpus by simply logging in with their university credentials. Alumni have the option to apply for access via https://lbr.csc.fi.\n\nAccess rights instructions: https:/ /www.kielipankki.fi/support/access (in Finnish: https://www.kielipankki.fi/tuki/kayttooikeudet/).\n\nInstructions on how to access Kielipankki corpora: https://www.kielipankki.fi/support/corpus-location/ (in Finnish: https://www.kielipankki.fi/tuki/aineiston-sijainti/).\n\nThe corpus contains various kinds of linguistic information attached to each token. The corpus was annotated using the Salama Tagger.\n\nPreparation of the material\n\nMost of the corpus material was retrieved from the Web. This method was used increasingly after texts in the Web became available. Only texts in news media and on open government pages were retrieved. Some types of texts, such as books, were scanned and proofread. Part of the oldest news material before the time of scanners in the 1980’ies was manually typed.\n\nThe corpus material has gone through a series of formatting and correction routines.\n\n1. Converting the text into ascii-format, required by the tagger. There is a wild variety of codes for describing diacritics in Web texts. These had to be formalized.\n2. Proofreading and correcting the text with a speller.\n3. Analyzing the proofread text for finding still remaining typos and possibly new words.\n4. Constructing a correction program that automatically corrects such typos that can be safely corrected. More than 8000 such mistake types were identified.\n5. New words found in corpus were added to the parser.\n6. Texts were corrected using the constructed correction program.\n7. Metadata in text files were formalized.\n8. Texts were converted into sentence-per-line format.\n9. Text within each file was randomly shuffled to mix the sentence order.\n\nThe result of these routines comprises the Helsinki Corpus of Swahili 2.0 Not Annotated Version.\n\nThe result of these routines was annotated with Salama Tagger, thus producing the Korp format of the corpus.\n\nMetadata were added to each file.\n\nStructure of the corpus\n\nHCS 2.0 contains the following types of material:\n\nOld material\n\n1. Books\n2. News\nNew material\n1. Bunge\n2. News\n\nOld material contains material before 2003. Much of this material is in Helsinki Corpus of Swahili 1.0. The big difference is, however, that while in the earlier corpus only sections of books were included, in the new corpus whole texts are included. The other difference is that while in the old corpus text sections are in the original order, in the new corpus sentences are randomly shuffled.\n\nMost of the new material consists of news texts from 2004-2015. The section ‘Bunge’ contains Hansards of the Tanzanian Parliament from the years 2004, 2005 and 2006. Metadata in the beginning of each file give more information. Also the names of the files give hints of the contents of the files.\n\nA word in the annotated corpus contains normally the following types of information:\n\n1. token\n2. stem\n3. part-of-speech\n4. morphological description\n5. gloss in English\n6. syntactic tag\n7. rest of verb description\n\nThe last point concerns only verbs.\n\nDetailed license information: See Documentation section below."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "3dc7924d-d652-468f-9ff5-67292f0b18ad",
"url": "http://lexvo.org/id/iso639-3/swa",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Swahili",
"fi": "Swahilin kieli",
"sv": "Swahili"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-201803271",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Helsinki Corpus of Swahili 2.0 (HCS 2.0) Downloadable Annotated Version",
"fi": "Helsinki Swahili -korpus 2.0 (HCS 2.0), ladattava annotoitu versio"
},
"created": "2022-11-17T00:00:00Z",
"modified": "2024-12-04T12:57:17Z",
"dataset_versions": [
{
"id": "a5feafcb-b10e-453a-8f41-2f96c6527274",
"title": {
"en": "Helsinki Corpus of Swahili 2.0 (HCS 2.0) Downloadable Annotated Version",
"fi": "Helsinki Swahili -korpus 2.0 (HCS 2.0), ladattava annotoitu versio"
},
"persistent_identifier": "urn:nbn:fi:lb-201803271",
"state": "published",
"created": "2022-11-17T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "c8596ad5-1975-4b50-9634-e1c5f5afd7fd",
"access_rights": {
"id": "fddb6819-0b18-4f7c-ad16-118d13ac5f28",
"license": [
{
"id": "edc3e7d1-0e14-490e-a7dd-6ace3dcb3673",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC0-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
"fi": "Creative Commons Yleismaailmallinen (CC0 1.0) Public Domain -lausuma"
}
}
],
"access_type": {
"id": "b41462f7-00bf-4e50-935b-2bf1184453a7",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "5126385c-af7b-4aea-a390-cb9e29cb54da",
"roles": [
"creator",
"rights_holder"
],
"person": {
"id": "ee193871-543c-414a-8831-cd5224a00021",
"name": "Mietta Lennes",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "b0114513-ca9e-43db-a4c3-a9bc30256483",
"roles": [
"creator"
],
"person": {
"id": "681a48e1-e619-4561-befb-3ee7bb2ffe6c",
"name": "Jussi Piitulainen",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "cefd1004-28f6-46be-a0d6-dcc749684c94",
"roles": [
"creator",
"rights_holder"
],
"person": {
"id": "36d271d7-8d6a-44d2-b794-5cd1d40d0138",
"name": "Tero Aalto",
"email": "<hidden>"
},
"organization": {
"id": "477dfdb6-563e-41d9-badc-ec3975f79256",
"pref_label": {
"en": "CSC – IT Center for Science",
"fi": "CSC - Tieteen tietotekniikan keskus Oy",
"sv": "CSC – IT Center for Science",
"und": "CSC - Tieteen tietotekniikan keskus Oy"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/09206320",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "fbbd6ea0-39c1-4af0-a485-165cadb76fa8",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "d41dacd2-a3bb-4cdf-bc9a-6f069d563c26",
"roles": [
"curator"
],
"person": {
"id": "9d355f04-67e0-4f39-b492-83542f9365f0",
"name": "User support at CSC - IT Center for Science Ltd. The Language Bank of Finland",
"email": "<hidden>"
},
"organization": {
"id": "477dfdb6-563e-41d9-badc-ec3975f79256",
"pref_label": {
"en": "CSC – IT Center for Science",
"fi": "CSC - Tieteen tietotekniikan keskus Oy",
"sv": "CSC – IT Center for Science",
"und": "CSC - Tieteen tietotekniikan keskus Oy"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/09206320",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "06e825dd-9df8-4d1f-a6f7-573025013741",
"roles": [
"rights_holder"
],
"person": {
"id": "e2114598-c51b-4e9d-bb65-f6ee6baa55c8",
"name": "Jussi Piitulainen",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The resource, which is the Helsinki Korp version of the Route to A wing Corpus, is available in Kielipankki - the Language Bank of Finland at http://urn.fi/urn:nbn:fi:lb-2015050502\n\nFor more information see http://urn.fi/urn:nbn:fi:lb-2014101401"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2016042614",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "The Helsinki Korp Version of the Route to A wing Corpus",
"fi": "Reitti A-siipeen -korpuksen Helsinki-Korp-versio"
},
"created": "2023-04-05T00:00:00Z",
"modified": "2024-12-04T12:53:57Z",
"dataset_versions": [
{
"id": "c8596ad5-1975-4b50-9634-e1c5f5afd7fd",
"title": {
"en": "The Helsinki Korp Version of the Route to A wing Corpus",
"fi": "Reitti A-siipeen -korpuksen Helsinki-Korp-versio"
},
"persistent_identifier": "urn:nbn:fi:lb-2016042614",
"state": "published",
"created": "2023-04-05T00:00:00Z",
"version": 1
}
],
"published_revision": 15,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "d2f2386a-fc21-499d-bafc-9378346a39c1",
"access_rights": {
"id": "baf54dea-3271-4212-a474-e40bba6b5525",
"license": [
{
"id": "edc3e7d1-0e14-490e-a7dd-6ace3dcb3673",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC0-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
"fi": "Creative Commons Yleismaailmallinen (CC0 1.0) Public Domain -lausuma"
}
}
],
"access_type": {
"id": "b41462f7-00bf-4e50-935b-2bf1184453a7",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "46a7f148-6725-4785-a406-f73176c89d53",
"roles": [
"creator",
"rights_holder"
],
"person": {
"id": "2655eef4-bb6b-40d7-9689-dbc6c577e554",
"name": "Mietta Lennes",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "610ed2c8-0aab-4de3-89ac-080cae3fc254",
"roles": [
"creator"
],
"person": {
"id": "177756aa-56cd-4a1a-8bd1-278740d6ac2a",
"name": "Jussi Piitulainen",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "5a186eea-b4e1-42c1-9c55-177a48e9905a",
"roles": [
"creator",
"rights_holder"
],
"person": {
"id": "d690219d-938c-43a3-b7d1-e9536d234138",
"name": "Tero Aalto",
"email": "<hidden>"
},
"organization": {
"id": "477dfdb6-563e-41d9-badc-ec3975f79256",
"pref_label": {
"en": "CSC – IT Center for Science",
"fi": "CSC - Tieteen tietotekniikan keskus Oy",
"sv": "CSC – IT Center for Science",
"und": "CSC - Tieteen tietotekniikan keskus Oy"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/09206320",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "7f1599bd-aaa6-4b8f-8068-a93daa619460",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "f2d1e04b-175b-4f83-9e4f-404536fec42c",
"roles": [
"curator"
],
"person": {
"id": "0067ec3f-8425-462d-8edb-3bb2a874f324",
"name": "User support at CSC - IT Center for Science Ltd. The Language Bank of Finland",
"email": "<hidden>"
},
"organization": {
"id": "477dfdb6-563e-41d9-badc-ec3975f79256",
"pref_label": {
"en": "CSC – IT Center for Science",
"fi": "CSC - Tieteen tietotekniikan keskus Oy",
"sv": "CSC – IT Center for Science",
"und": "CSC - Tieteen tietotekniikan keskus Oy"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/09206320",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "d744a548-a01e-4485-a542-021e2a88c4ed",
"roles": [
"rights_holder"
],
"person": {
"id": "06f9c592-8156-4ba5-bfbc-4ce4430850a8",
"name": "Jussi Piitulainen",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "Until November 2020, this corpus was available via the LAT platform in Kielipankki - the Language Bank of Finland (see Access location).\n\nA downloadable version of the same content is available (see Relations). Further details about the most recent version are available on the info page of this corpus family (https://www.kielipankki.fi/aineistot/reittidemo/, in Finnish).",
"fi": "Tämä aineisto oli saatavilla Kielipankin LAT-alustan kautta marraskuuhun 2020 saakka (ks. Access location).\n\nAineistosta on saatavilla vastaavan sisältöinen ladattava versio (ks. Relations). Lisätietoja löytyy aineiston tietosivulta Kielipankissa (ks. https://www.kielipankki.fi/aineistot/reittidemo)"
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2016042613",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "The Helsinki LAT Version of the Route to A wing Corpus",
"fi": "Reitti A-siipeen -korpuksen Helsinki-LAT-versio"
},
"created": "2023-09-09T00:00:00Z",
"modified": "2024-12-04T12:53:53Z",
"dataset_versions": [
{
"id": "d2f2386a-fc21-499d-bafc-9378346a39c1",
"title": {
"en": "The Helsinki LAT Version of the Route to A wing Corpus",
"fi": "Reitti A-siipeen -korpuksen Helsinki-LAT-versio"
},
"persistent_identifier": "urn:nbn:fi:lb-2016042613",
"state": "published",
"created": "2023-09-09T00:00:00Z",
"version": 1
}
],
"published_revision": 15,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "143c69e4-9347-45bb-926d-f16faf47768d",
"access_rights": {
"id": "1bf9d675-1898-4a30-8561-9bae8f3a4655",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-20150304134",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "d7c057cc-0076-4cee-aa60-354a285586c3",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "bdd6cb24-71fe-40e0-9ba0-bfb1ea32b50a",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "b5317b81-5dd1-4cb0-8df6-17aeed0c4af6",
"roles": [
"curator"
],
"person": {
"id": "0d701595-f9da-4a11-a379-70149233765b",
"name": "User support at CSC - IT Center for Science Ltd. The Language Bank of Finland",
"email": "<hidden>"
},
"organization": {
"id": "477dfdb6-563e-41d9-badc-ec3975f79256",
"pref_label": {
"en": "CSC – IT Center for Science",
"fi": "CSC - Tieteen tietotekniikan keskus Oy",
"sv": "CSC – IT Center for Science",
"und": "CSC - Tieteen tietotekniikan keskus Oy"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/09206320",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "aaf593ee-68c1-45af-bb83-8f3f97709cc7",
"roles": [
"curator"
],
"person": {
"id": "ab01fe2b-0d8a-4187-a9e1-ce153b951228",
"name": "Terho Joutsen",
"email": "<hidden>"
},
"organization": {
"id": "bdd6cb24-71fe-40e0-9ba0-bfb1ea32b50a",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "a4c40880-7c92-4b66-b5d3-e499c4d771ac",
"roles": [
"publisher"
],
"organization": {
"id": "eac20fd9-c416-494f-a06e-189d8b90c93c",
"pref_label": {
"en": "Multiple publishers, check distribution rights holders in original metadata by following its persistent identifier"
}
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The corpus is available in Kielipankki - the Language Bank of Finland (puhti.csc.fi, access rights instructions: https://www.kielipankki.fi/support/access/) \n\nLicence: https://www.kielipankki.fi/lic/kra/\n\nA digitized corpus for the study of the lexis and syntax of Middle French and for text editions.\n\nThe corpus consists of 14 documents and 430 000 words. The corpus contains prose, novels, plays and lyrical poetry. It can be used to study the Middle French vocabulary and syntax. \n\nLicense information: http://urn.fi/urn:nbn:fi:lb-20150304134\n\nMore information on the corpus:\nhttp://urn.fi/urn:nbn:fi:lb-201406034\n\nThe purpose of the resource use must be outlined in a research plan."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "0974b041-177a-48b6-b3af-0f68e212d27e",
"url": "http://lexvo.org/id/iso639-3/frm",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Middle French",
"fi": "keskiranska",
"sv": "medelfranska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-201403264",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Jyväskylä Corpus of Middle French",
"fi": "Keskiranskan korpus"
},
"created": "2023-04-07T00:00:00Z",
"modified": "2024-12-04T12:51:48Z",
"dataset_versions": [
{
"id": "143c69e4-9347-45bb-926d-f16faf47768d",
"title": {
"en": "Jyväskylä Corpus of Middle French",
"fi": "Keskiranskan korpus"
},
"persistent_identifier": "urn:nbn:fi:lb-201403264",
"state": "published",
"created": "2023-04-07T00:00:00Z",
"version": 1
}
],
"published_revision": 16,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "29f8621f-7fd4-4051-8eb8-8cde86352606",
"access_rights": {
"id": "9e97f9d9-c7f0-4f53-8645-415149ec7069",
"license": [
{
"id": "cef12353-9b39-4c92-8a0c-8de977ac8d28",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2019121001",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "d0da98cd-9cd2-4352-b12a-3e3efcb585fd",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/research",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restriced access for research based on contract",
"fi": "Saatavuutta rajoitettu sopimuksen perusteella vain tutkimuskäyttöön",
"sv": "Begränsad åtkomst på bas av kontrakt ändast för forskningsändamål"
}
}
]
},
"actors": [
{
"id": "1bd055ec-96c7-4ff4-8a44-8b1dee559dd1",
"roles": [
"creator",
"curator"
],
"person": {
"id": "c46b0104-a12b-45b2-9686-155cd63a972a",
"name": "Anna Dmitrieva",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "11265d13-1869-45a2-b77d-30949011f810",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "0dcc95ab-5795-4014-af70-29d526b0b396",
"pref_label": {
"en": "Finnish Broadcasting Company (Yle)",
"fi": "Yleisradio Oy"
},
"homepage": {
"url": "https://yle.fi/"
},
"email": "<hidden>"
}
},
{
"id": "9770abe1-d6ae-4fc9-b5ca-71c5d039ee41",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource will be available via Korp in Kielipankki – the Language Bank of Finland.\n\nThis is a parallel corpus created of the Yle news articles from 2014-2020 by aligning the standard Finnish versions with the easy-language versions. The dataset, created by Anna Dmitrieva and available in CSV format, is aligned on the sentence level. It is based on the two parallel document-level datasets of Yle News articles available on Kielipankki (http://urn.fi/urn:nbn:fi:lb-2022111625 and http://urn.fi/urn:nbn:fi:lb-2024011701) The dataset spans the period from September 2014 to December 2020.\n\nThis dataset is comprised of the following parts:\n1) Sentence alignments: parallel documents from regular and Easy Finnish Yle news articles aligned sentence-by-sentence. Only the \"positive\" documents were taken from the 2019-2020 dataset (http://urn.fi/urn:nbn:fi:lb-2022111625) All but 50 documents were aligned automatically with Vecalign (https://github.com/thompsonb/vecalign) using LASER embeddings (https://github.com/facebookresearch/LASER) Each document has the following columns:\n1.1) pair_id: an id comprised of three parts divided by a double underscore: the id of the regular document, the id of the Easy Finnish document (with a singular underscore), and the sentence pair number.\n1.2) regular_string: a sentence from the regular Finnish article.\n1.3) selko_string: a corresponding sentence from the Easy Finnish article.\n1.4) score: the confidence score given by Vecalign. The lower the score, the more similar the sentences. The \"good\" pairs are estimated to have a score below or equal to 0.65; however, the score is not definitive proof of whether the sentences in the pair truly match in meaning. The zero score is assigned when a sentence has no pair. The scores for all non-zero sentence pairs in manually aligned documents are set to 0.(3).\n2) Golden sentence alignments: 50 documents aligned manually by a human assessor (text). Also available in the ladder format (indexes)."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2024031301",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Parallel Sentence Aligned Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2014-2020, Korp",
"fi": "Lausetasolla kohdistettu suomi–selkosuomi-rinnakkaiskorpus Ylen suomenkielisestä uutisarkistosta 2014-2020, Korp"
},
"created": "2024-03-27T00:00:00Z",
"modified": "2024-06-19T08:32:26Z",
"dataset_versions": [
{
"id": "29f8621f-7fd4-4051-8eb8-8cde86352606",
"title": {
"en": "Parallel Sentence Aligned Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2014-2020, Korp",
"fi": "Lausetasolla kohdistettu suomi–selkosuomi-rinnakkaiskorpus Ylen suomenkielisestä uutisarkistosta 2014-2020, Korp"
},
"persistent_identifier": "urn:nbn:fi:lb-2024031301",
"state": "published",
"created": "2024-03-27T00:00:00Z",
"version": 1
}
],
"published_revision": 13,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "c39baf38-167a-4924-9088-42b098e47f5e",
"access_rights": {
"id": "f2444709-3619-45b5-aaec-b77045662ee0",
"license": [
{
"id": "cef12353-9b39-4c92-8a0c-8de977ac8d28",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2022050901",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "d0da98cd-9cd2-4352-b12a-3e3efcb585fd",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/research",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restriced access for research based on contract",
"fi": "Saatavuutta rajoitettu sopimuksen perusteella vain tutkimuskäyttöön",
"sv": "Begränsad åtkomst på bas av kontrakt ändast för forskningsändamål"
}
}
]
},
"actors": [
{
"id": "a2c52220-3674-4dc9-9506-3a5673fe79e9",
"roles": [
"creator",
"curator"
],
"person": {
"id": "96f9aea5-f8c2-4041-8586-1b0766052fbf",
"name": "Anna Dmitrieva",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "2100680b-2823-41fa-b4ac-98edba39627a",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "f4e023be-53b8-4eb7-b502-50958280aa5c",
"pref_label": {
"en": "Finnish Broadcasting Company (Yle)",
"fi": "Yleisradio Oy"
},
"homepage": {
"url": "https://yle.fi/"
},
"email": "<hidden>"
}
},
{
"id": "863ca0b1-bd7a-4fe9-9c7b-c6e24dc613e2",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki – the Language Bank of Finland.\n\nThis is a parallel corpus created of the Yle news articles from 2014-2018 by aligning the standard Finnish versions with the easy-language versions. The dataset, created by Anna Dmitrieva and available in CSV format, is aligned on the document level. The news articles were obtained from the datasets available via Kielipankki (http://urn.fi/urn:nbn:fi:lb-2017070501 and http://urn.fi/urn:nbn:fi:lb-2019050901).\n\nThis dataset extends the previously published Parallel Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2019-2020 (http://urn.fi/urn:nbn:fi:lb-2022111625) Please note that this dataset has not been assessed by a human expert. The articles have been aligned automatically with the Vecalign document alignment algorithm (https://github.com/thompsonb/vecalign) without candidate rescoring, using LASER embeddings (https://github.com/facebookresearch/LASER).\n\nDescription of all columns in the dataset:\n-index_in_selko: This index consists of two parts divided by an underscore. The first (longer) part identifies the entire Easy Finnish article from the original dataset. The second (shorter) part is the number of the paragraph. Since the Yle Selkosuomi articles usually consist of multiple paragraphs, each paragraph describing a separate piece of news, we represent each paragraph as an individual little article in our dataset. Paragraph numbering starts with 0.\n- index_in_regular: The identifier of the regular Finnish article taken from the original dataset.\n- selko_text: A piece of news in Easy Finnish.\n- regular_text: A corresponding piece of news in regular Finnish.\n- distance: The cosine distance between the document vectors. The lower the distance, the more similar the documents are."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2024011701",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Parallel Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2014-2018, source",
"fi": "Suomi-selkosuomi-rinnakkaiskorpus Ylen suomenkielisestä uutisarkistosta 2014-2018, lähdeaineisto"
},
"created": "2024-02-02T00:00:00Z",
"modified": "2024-06-19T08:31:00Z",
"dataset_versions": [
{
"id": "c39baf38-167a-4924-9088-42b098e47f5e",
"title": {
"en": "Parallel Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2014-2018, source",
"fi": "Suomi-selkosuomi-rinnakkaiskorpus Ylen suomenkielisestä uutisarkistosta 2014-2018, lähdeaineisto"
},
"persistent_identifier": "urn:nbn:fi:lb-2024011701",
"state": "published",
"created": "2024-02-02T00:00:00Z",
"version": 1
}
],
"published_revision": 13,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "74a8ede4-89fb-4036-99d0-9cd2967c70cd",
"access_rights": {
"id": "e9d187f1-1a23-4ee9-9a3b-f432432a8b12",
"license": [
{
"id": "cef12353-9b39-4c92-8a0c-8de977ac8d28",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2022050901",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinACA+NC-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN ACA+NC (Academic, Non-Commercial) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "d0da98cd-9cd2-4352-b12a-3e3efcb585fd",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/research",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restriced access for research based on contract",
"fi": "Saatavuutta rajoitettu sopimuksen perusteella vain tutkimuskäyttöön",
"sv": "Begränsad åtkomst på bas av kontrakt ändast för forskningsändamål"
}
}
]
},
"actors": [
{
"id": "4c8c751f-5e8a-42d7-b16f-52cc5cd495c5",
"roles": [
"creator",
"curator"
],
"person": {
"id": "b9068701-b790-4f09-b4ef-4134fa03b632",
"name": "Anna Dmitrieva",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "b194efc6-a0f3-4696-920e-4d08b75dbaef",
"roles": [
"creator",
"rights_holder"
],
"organization": {
"id": "a7aa5c3d-f199-4160-a24f-6ec732b48216",
"pref_label": {
"en": "Finnish Broadcasting Company (Yle)",
"fi": "Yleisradio Oy"
},
"homepage": {
"url": "https://yle.fi/"
},
"email": "<hidden>"
}
},
{
"id": "4921472c-0b67-4748-9031-ac93225c38a6",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki – the Language Bank of Finland.\n\nThis is a parallel corpus created of the Yle news articles from 2014-2020 by aligning the standard Finnish versions with the easy-language versions. The dataset, created by Anna Dmitrieva and available in CSV format, is aligned on the sentence level. It is based on the two parallel document-level datasets of Yle News articles available on Kielipankki (http://urn.fi/urn:nbn:fi:lb-2022111625 and http://urn.fi/urn:nbn:fi:lb-2024011701) The dataset spans the period from September 2014 to December 2020.\n\nThis dataset is comprised of the following parts:\n1) Sentence alignments: parallel documents from regular and Easy Finnish Yle news articles aligned sentence-by-sentence. Only the \"positive\" documents were taken from the 2019-2020 dataset (http://urn.fi/urn:nbn:fi:lb-2022111625) All but 50 documents were aligned automatically with Vecalign (https://github.com/thompsonb/vecalign) using LASER embeddings (https://github.com/facebookresearch/LASER) Each document has the following columns:\n1.1) pair_id: an id comprised of three parts divided by a double underscore: the id of the regular document, the id of the Easy Finnish document (with a singular underscore), and the sentence pair number.\n1.2) regular_string: a sentence from the regular Finnish article.\n1.3) selko_string: a corresponding sentence from the Easy Finnish article.\n1.4) score: the confidence score given by Vecalign. The lower the score, the more similar the sentences. The \"good\" pairs are estimated to have a score below or equal to 0.65; however, the score is not definitive proof of whether the sentences in the pair truly match in meaning. The zero score is assigned when a sentence has no pair. The scores for all non-zero sentence pairs in manually aligned documents are set to 0.(3).\n2) Golden sentence alignments: 50 documents aligned manually by a human assessor (text). Also available in the ladder format (indexes)."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2024011703",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Parallel Sentence Aligned Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2014-2020, source",
"fi": "Lausetasolla kohdistettu suomi–selkosuomi-rinnakkaiskorpus Ylen suomenkielisestä uutisarkistosta 2014-2020, lähdeaineisto"
},
"created": "2024-02-09T00:00:00Z",
"modified": "2024-06-19T08:30:55Z",
"dataset_versions": [
{
"id": "74a8ede4-89fb-4036-99d0-9cd2967c70cd",
"title": {
"en": "Parallel Sentence Aligned Corpus of Finnish and Easy-to-read Finnish from the Yle News Archive 2014-2020, source",
"fi": "Lausetasolla kohdistettu suomi–selkosuomi-rinnakkaiskorpus Ylen suomenkielisestä uutisarkistosta 2014-2020, lähdeaineisto"
},
"persistent_identifier": "urn:nbn:fi:lb-2024011703",
"state": "published",
"created": "2024-02-09T00:00:00Z",
"version": 1
}
],
"published_revision": 13,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "73b6c56e-13ee-4ac6-949c-4ae546b27f3e",
"access_rights": {
"id": "1ffff6fd-5111-446b-bc5f-703b68a7c311",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
},
{
"id": "5a00a777-42a2-41a9-9911-968146f7e53f",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Other",
"fi": "Muu"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "da6d4157-741c-4e00-96aa-4c983a2941ba",
"roles": [
"creator",
"curator"
],
"person": {
"id": "e153d443-2054-4dc8-ad38-0b73b30133b9",
"name": "Anssi Moisio",
"email": "<hidden>"
},
"organization": {
"id": "2f9f9011-4061-4a93-96af-768471abada7",
"pref_label": {
"en": "Aalto University",
"fi": "Aalto-yliopisto",
"sv": "Aalto-universitetet",
"und": "Aalto-yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/10076",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "9fbbc378-8a7a-41b5-8f32-c389bfe39e4c",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki - The Language Bank of Finland as part of \"Donate Speech: Selected dataset\", http://urn.fi/urn:nbn:fi:lb-2022060127.\n\nThe resource contains a 10-hour subset of speech from the Donate Speech Corpus. This set includes the smaller set puhelahjat-test-mtr, where each recording was transcribed by four different transcribers, but the set was extended by including all recordings by the same 57 speakers (according to the metadata accompanying the original recordings). The multi-transcriber data was used for testing an ASR system at Aalto University.\n\nFor speech technology development purposes, this multi-transcriber speaker dataset can be used together with the smaller puhelahjat-test-mtr set."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022060125",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Donate Speech Corpus: Test data from multi-transcriber speakers (10h)",
"fi": "Lahjoita puhetta -aineisto: Testidata useaan kertaan litteroiduilta puhujilta (10h)"
},
"created": "2023-04-21T00:00:00Z",
"modified": "2024-06-19T08:26:06Z",
"dataset_versions": [
{
"id": "73b6c56e-13ee-4ac6-949c-4ae546b27f3e",
"title": {
"en": "Donate Speech Corpus: Test data from multi-transcriber speakers (10h)",
"fi": "Lahjoita puhetta -aineisto: Testidata useaan kertaan litteroiduilta puhujilta (10h)"
},
"persistent_identifier": "urn:nbn:fi:lb-2022060125",
"state": "published",
"created": "2023-04-21T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "20c70fe8-07d5-47b6-a823-d3576644facf",
"access_rights": {
"id": "4d207f2c-999a-4127-a75d-fcfc1b5399b7",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2022062222",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "0e9763b6-a8ee-4ff6-904b-5e389a13b271",
"roles": [
"creator",
"curator"
],
"person": {
"id": "a25f6e2f-6bfd-4a71-bec2-799293abef84",
"name": "Tuomas Harviainen",
"email": "<hidden>"
},
"organization": {
"id": "755e6950-1e73-42ac-aa83-70b99783fad0",
"pref_label": {
"en": "Tampere University",
"fi": "Tampereen yliopisto"
},
"homepage": {
"url": "https://ror.org/033003e23"
},
"email": "<hidden>"
}
},
{
"id": "c103a96d-e0f1-4211-bf09-effaea024141",
"roles": [
"publisher",
"rights_holder"
],
"organization": {
"id": "755e6950-1e73-42ac-aa83-70b99783fad0",
"pref_label": {
"en": "Tampere University",
"fi": "Tampereen yliopisto"
},
"homepage": {
"url": "https://ror.org/033003e23"
},
"email": "<hidden>"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The resource is available for restricted use via Kielipankki - the Language Bank of Finland. Instructions on applying for access are available on the resource group page (see Documentation).\n\nThis Finnish dataset consists of 3 104 515 messages posted on the Torilauta discussion board operating in the dark web in the years 2017-2020. The data were collected and submitted by the site administrator in order to be archived for research use. The data set was received by the ENNCODE project at the University of Tampere. \n\nIn addition to the message title and text, the posts contain the following metadata: time stamps of sending and deletion, sender's nickname, subject area, and the message and thread identifiers. The data was provided as a JSONLINES text file, each line of which corresponds to one message and its metadata in JSON format. Individual messages have been removed from the data for data protection reasons.",
"fi": "Tämä aineisto on saatavilla rajoitettuun käyttöön Kielipankin kautta. Ohjeet käyttöoikeuksien hakemiseen löytyvät aineistoryhmän sivulta (ks. Documentation).\n\nTämä suomenkielinen aineisto koostuu 3 104 515 viestistä, jotka on lähetetty pimeässä verkossa toimineelle Torilauta-keskustelupalstalle vuosina 2017–2020. Aineiston keräsi ja luovutti arkistoitavaksi tutkimuskäyttöä varten sivuston ylläpitäjä. Aineiston vastaanotti Tampereen yliopiston ENNCODE-hanke. \n\nViestit sisältävät otsikon ja tekstin lisäksi seuraavat metatiedot: lähetys- ja poistoaikaleima, lähettäjän nimimerkki, aihealue sekä viesti- ja ketjutunniste. Aineisto on toimitettu JSONLINES-tekstitiedostona, jonka jokainen rivi vastaa yhtä viestiä metatietoineen JSON-muodossa. Tietosuojasyistä aineistosta on poistettu henkilö- ja tunnistetietoja ja kokonaisia viestejä."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022062221",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Finnish Dark Web Marketplace Corpus",
"fi": "Suomenkielisen pimeän verkon kauppapaikka-aineisto"
},
"created": "2024-04-03T00:00:00Z",
"modified": "2024-06-19T08:24:56Z",
"dataset_versions": [
{
"id": "20c70fe8-07d5-47b6-a823-d3576644facf",
"title": {
"en": "Finnish Dark Web Marketplace Corpus",
"fi": "Suomenkielisen pimeän verkon kauppapaikka-aineisto"
},
"persistent_identifier": "urn:nbn:fi:lb-2022062221",
"state": "published",
"created": "2024-04-03T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "6692df1f-1f0d-4f43-b713-e3e3b36b78ff",
"access_rights": {
"id": "6f6cc8cb-9d01-40e0-8117-950f10624194",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
},
{
"id": "5a00a777-42a2-41a9-9911-968146f7e53f",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Other",
"fi": "Muu"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "adb3e890-3592-4053-920f-0827d606f789",
"roles": [
"creator",
"curator"
],
"person": {
"id": "bb778074-de00-4152-bafb-06963597a2a3",
"name": "Anssi Moisio",
"email": "<hidden>"
},
"organization": {
"id": "2f9f9011-4061-4a93-96af-768471abada7",
"pref_label": {
"en": "Aalto University",
"fi": "Aalto-yliopisto",
"sv": "Aalto-universitetet",
"und": "Aalto-yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/10076",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "98a7ef5c-43c3-44e9-927e-627fc49a9676",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki - The Language Bank of Finland as part of \"Donate Speech: Selected dataset\", http://urn.fi/urn:nbn:fi:lb-2022060127.\n\nThe resource contains a subset of 10 hours of transcribed speech that was selected from the Donate Speech Corpus and used for testing an ASR system at Aalto University.\n\nThe test data includes at least ten minutes of speech for each metadata class in each of the five metadata domains (age, dialect, gender, native/non-native and theme). The set contains speech from 103 different speakers (according to the metadata accompanying the original recordings). The gender ratio has been debiased, so that the set includes over 40% male speakers (similarly to the puhelahjat-dev set, while the puhelahjat-train set has just over 20% of male speakers). \n\nFor speech technology development purposes, the test dataset can be used together with the puhelahjat-dev and puhelahjat-train datasets. There is no overlap of speakers between these three sets."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022060122",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Donate Speech Corpus: Test data (10h)",
"fi": "Lahjoita puhetta -aineisto: Testidata (10h)"
},
"created": "2023-04-21T00:00:00Z",
"modified": "2024-06-19T08:24:24Z",
"dataset_versions": [
{
"id": "6692df1f-1f0d-4f43-b713-e3e3b36b78ff",
"title": {
"en": "Donate Speech Corpus: Test data (10h)",
"fi": "Lahjoita puhetta -aineisto: Testidata (10h)"
},
"persistent_identifier": "urn:nbn:fi:lb-2022060122",
"state": "published",
"created": "2023-04-21T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "212653f4-5a1a-4443-bbe5-7c48c7db799c",
"access_rights": {
"id": "2ee56bf8-c6fc-48ef-b866-ed875e9fcbc7",
"license": [
{
"id": "edc3e7d1-0e14-490e-a7dd-6ace3dcb3673",
"custom_url": "http://urn.fi/urn:nbn:fi:lb-2022041923",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/CC0-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Creative Commons CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
"fi": "Creative Commons Yleismaailmallinen (CC0 1.0) Public Domain -lausuma"
}
}
],
"access_type": {
"id": "b41462f7-00bf-4e50-935b-2bf1184453a7",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/open",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Open",
"fi": "Avoin"
}
},
"restriction_grounds": []
},
"actors": [
{
"id": "17ac405b-5f80-4922-8a73-f750900a2f49",
"roles": [
"creator",
"curator",
"rights_holder"
],
"person": {
"id": "62975232-da4c-4482-b0fe-e916179b8d2d",
"name": "Frankie R. Robertson",
"email": "<hidden>"
},
"organization": {
"id": "bdd6cb24-71fe-40e0-9ba0-bfb1ea32b50a",
"pref_label": {
"en": "University of Jyväskylä",
"fi": "Jyväskylän yliopisto",
"sv": "Jyväskylä universitet",
"und": "Jyväskylän yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01906",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "99d21861-6d47-44b0-8a6d-196031697c26",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "The TallVocabL2Fi dataset comprises of responses from 15 participants to a \"tall\" 12000 word 5-point scale self-rating response task and a 100 word confirmatory word translation response task. The 15 participants were split by native language, 5 English, 4 Hungarian and 6 Russian, and self-reported CEFR reading level, 5 B1, 4 B2, 5 C1 and 2 C2. The data was gathered through a website from paid participants resident in Finland over a period of 3 months from September and November 2021. In total there are 180 thousand word knowledge self-rating responses and 1.5 thousand word translation responses.\n\nThe dataset is unique in its combination of the tall data collection set up, where responses are collected for many words, the varied backgrounds of the learners, the use of Finnish prompt words, and the triangulation with a word translation test. The dataset can be used for vocabulary acquisition research in general, but it is particularly suited to evaluation of the task of Vocabulary Inventory Prediction (VIP) including techniques based on Computer-Adaptive Testing (CAT).\n\nThe dataset is relational/tabular. It is distributed as a series of TSV files along with a SQL schema exported from DuckDB. \n\nThe TallVocabL2Fi dataset is available for download via Kielipankki – The Language Bank of Finland.\n\nFurther information about the schema and the collection process is available in the readme included with the data, and in the accompanying publication: \n\nRobertson, F., Chang & L., Söyrinki, S. (2022). TallVocabL2Fi: An Extensive Mapping of 15 Finnish L2 Learners' Vocabulary. In Language Resources and Evaluation Conference (LREC 2022)."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022041921",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "TallVocabL2Fi: Measurements of 15 L2 Finnish learners' vocabularies",
"fi": "TallVocabL2Fi: Mitat 15 S2-opiskelijan sanavarastosta"
},
"created": "2023-04-05T00:00:00Z",
"modified": "2024-06-19T08:23:20Z",
"dataset_versions": [
{
"id": "212653f4-5a1a-4443-bbe5-7c48c7db799c",
"title": {
"en": "TallVocabL2Fi: Measurements of 15 L2 Finnish learners' vocabularies",
"fi": "TallVocabL2Fi: Mitat 15 S2-opiskelijan sanavarastosta"
},
"persistent_identifier": "urn:nbn:fi:lb-2022041921",
"state": "published",
"created": "2023-04-05T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "df695ee8-c5f8-495a-b8a5-2658085d0894",
"access_rights": {
"id": "a473a449-f834-48b1-bf1e-89f290cf979f",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
},
{
"id": "5a00a777-42a2-41a9-9911-968146f7e53f",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Other",
"fi": "Muu"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "f786e469-71ea-459c-9cdf-5eb4ee60d54b",
"roles": [
"creator",
"curator"
],
"person": {
"id": "775cb394-61fa-451f-8cf1-6d21b62cb9a9",
"name": "Anssi Moisio",
"email": "<hidden>"
},
"organization": {
"id": "2f9f9011-4061-4a93-96af-768471abada7",
"pref_label": {
"en": "Aalto University",
"fi": "Aalto-yliopisto",
"sv": "Aalto-universitetet",
"und": "Aalto-yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/10076",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "52ea557b-003a-4027-88ee-ec72d279e961",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki - The Language Bank of Finland as part of \"Donate Speech: Selected dataset\", http://urn.fi/urn:nbn:fi:lb-2022060127.\n\nThe resource contains a subset of 100 hours of transcribed speech that was selected from the Donate Speech Corpus and used for training an ASR system at Aalto University.\n\nThe training data includes speech from 1129 different speakers (according to the metadata accompanying the original recordings). Note that the training dataset has just over 20% of male speakers, whereas the puhelahjat-test and puhelahjat-dev sets contain 40% of male speakers.\n\nFor speech technology development purposes, the training dataset can be used together with the puhelahjat-test and puhelahjat-dev datasets. There is no overlap of speakers between these three sets."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022060123",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Donate Speech Corpus: Training data (100h)",
"fi": "Lahjoita puhetta -aineisto: Opetusdata (100h)"
},
"created": "2023-04-21T00:00:00Z",
"modified": "2024-06-19T08:21:24Z",
"dataset_versions": [
{
"id": "df695ee8-c5f8-495a-b8a5-2658085d0894",
"title": {
"en": "Donate Speech Corpus: Training data (100h)",
"fi": "Lahjoita puhetta -aineisto: Opetusdata (100h)"
},
"persistent_identifier": "urn:nbn:fi:lb-2022060123",
"state": "published",
"created": "2023-04-21T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "24da85be-f979-464f-b5ee-6a1755d18fea",
"access_rights": {
"id": "9c449a2d-da8c-41d7-961d-043b370c9ce9",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
},
{
"id": "5a00a777-42a2-41a9-9911-968146f7e53f",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Other",
"fi": "Muu"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "a4cc9f3d-158b-4f55-ac84-477f404baf44",
"roles": [
"creator",
"curator"
],
"person": {
"id": "e2a3c815-96f0-4df5-b6cd-f8ad417a4d23",
"name": "Anssi Moisio",
"email": "<hidden>"
},
"organization": {
"id": "2f9f9011-4061-4a93-96af-768471abada7",
"pref_label": {
"en": "Aalto University",
"fi": "Aalto-yliopisto",
"sv": "Aalto-universitetet",
"und": "Aalto-yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/10076",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "720a2c4e-cdfd-4dc4-af1b-f7eb32709b3e",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki - The Language Bank of Finland as part of \"Donate Speech: Selected dataset\", http://urn.fi/urn:nbn:fi:lb-2022060127.\n\nThe resource contains a 1-hour subset of speech from the Donate Speech Corpus. In this set, each recording was transcribed by four different transcribers. The multi-transcriber data was used for testing an ASR system at Aalto University.\n\nThe set contains speech from 57 different speakers (according to the metadata accompanying the original recordings).\n\nFor speech technology development purposes, this multi-transcriber test dataset can be used together with the puhelahjat-test-mtrs set that was extended by adding all recordings by the same speakers to the current puhelahjat-test-mtr dataset."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022060124",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Donate Speech Corpus: Multi-transcriber test data (1h)",
"fi": "Lahjoita puhetta -aineisto: Usean litteroijan testidata (1h)"
},
"created": "2023-04-21T00:00:00Z",
"modified": "2024-06-19T08:21:19Z",
"dataset_versions": [
{
"id": "24da85be-f979-464f-b5ee-6a1755d18fea",
"title": {
"en": "Donate Speech Corpus: Multi-transcriber test data (1h)",
"fi": "Lahjoita puhetta -aineisto: Usean litteroijan testidata (1h)"
},
"persistent_identifier": "urn:nbn:fi:lb-2022060124",
"state": "published",
"created": "2023-04-21T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "99a59be7-6780-4144-a33f-2a6537bd05b8",
"access_rights": {
"id": "c1a448a1-de7b-4ba0-aa53-ffc271c2eda6",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
},
{
"id": "5a00a777-42a2-41a9-9911-968146f7e53f",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "Other",
"fi": "Muu"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "05181b83-d8fb-4749-bcd9-41b86da4e65f",
"roles": [
"creator",
"curator"
],
"person": {
"id": "cb8a4107-f503-48da-ad85-9390ee770571",
"name": "Anssi Moisio",
"email": "<hidden>"
},
"organization": {
"id": "2f9f9011-4061-4a93-96af-768471abada7",
"pref_label": {
"en": "Aalto University",
"fi": "Aalto-yliopisto",
"sv": "Aalto-universitetet",
"und": "Aalto-yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/10076",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "9be84a39-ce6f-4015-93b8-4be8b819f63b",
"roles": [
"publisher"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"en": "This resource is available for download in Kielipankki - The Language Bank of Finland as part of \"Donate Speech: Selected dataset\", http://urn.fi/urn:nbn:fi:lb-2022060127.\n\nThe resource contains a subset of 10 hours of transcribed speech that was selected from the Donate Speech Corpus and used for developing an ASR system at Aalto University.\n\nThe development data includes at least ten minutes of speech for each metadata class in each of the five metadata domains (age, dialect, gender, native/non-native and theme). The set contains speech from 103 different speakers (according to the metadata accompanying the original recordings). The gender ratio has been debiased, so that the set includes over 40% male speakers (similarly to the puhelahjat-test set, while the puhelahjat-train set has just over 20% of male speakers). \n\nFor speech technology development purposes, the development dataset can be used together with the puhelahjat-test and puhelahjat-train datasets. There is no overlap of speakers between these three sets."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2022060121",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Donate Speech Corpus: Development data (10h)",
"fi": "Lahjoita puhetta -aineisto: Kehitysdata (10h)"
},
"created": "2023-04-21T00:00:00Z",
"modified": "2024-06-19T08:19:50Z",
"dataset_versions": [
{
"id": "99a59be7-6780-4144-a33f-2a6537bd05b8",
"title": {
"en": "Donate Speech Corpus: Development data (10h)",
"fi": "Lahjoita puhetta -aineisto: Kehitysdata (10h)"
},
"persistent_identifier": "urn:nbn:fi:lb-2022060121",
"state": "published",
"created": "2023-04-21T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
},
{
"id": "a3837165-7d27-4f84-9a90-3348595ae786",
"access_rights": {
"id": "1938e07a-a117-426e-afab-5cf0cf7b76b1",
"license": [
{
"id": "fc8a7008-cf5e-4df1-9e05-0145d445c80e",
"url": "http://uri.suomi.fi/codelist/fairdata/license/code/ClarinRES-1.0",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/license",
"pref_label": {
"en": "CLARIN RES (Restricted) End-User License 1.0"
}
}
],
"access_type": {
"id": "6311561d-0b53-460b-9fb9-1852ce0761eb",
"url": "http://uri.suomi.fi/codelist/fairdata/access_type/code/restricted",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/access_type",
"pref_label": {
"en": "Restricted use",
"fi": "Saatavuutta rajoitettu"
}
},
"restriction_grounds": [
{
"id": "41b4ff93-e15e-4829-8553-49e4ef079d69",
"url": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds/code/other",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/restriction_grounds",
"pref_label": {
"en": "Restricted access due to other reasons",
"fi": "Saatavuutta rajoitettu muulla perusteella",
"sv": "Begränsad åtkomst av övriga skäl"
}
}
]
},
"actors": [
{
"id": "e00b157d-185c-422b-bfea-f6e15716ace2",
"roles": [
"creator",
"curator",
"rights_holder"
],
"person": {
"id": "bc8673b5-92c7-4b11-865d-1d7d24eaae5c",
"name": "Mikko Ojanen",
"email": "<hidden>"
},
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
},
{
"id": "51ab8bf8-711f-4b3a-b3ac-569b56d2c267",
"roles": [
"publisher",
"rights_holder"
],
"organization": {
"id": "a5f4935c-4c0e-44f6-a7d6-71b946913649",
"pref_label": {
"en": "University of Helsinki",
"fi": "Helsingin yliopisto",
"sv": "Helsingfors universitet",
"und": "Helsingin yliopisto"
},
"url": "http://uri.suomi.fi/codelist/fairdata/organization/code/01901",
"in_scheme": "http://uri.suomi.fi/codelist/fairdata/organization"
}
}
],
"cumulative_state": 0,
"data_catalog": "urn:nbn:fi:att:data-catalog-harvest-kielipankki",
"description": {
"fi": "FinEARS-aineisto sisältää suomalaisen elektroakustisen musiikin historiaan liittyviä teemahaastatteluja, joita on tehty vuodesta 2004 alkaen. Aineisto on suomenkielistä. Haastatteluissa käsitellään suomalaisen elektroakustisen musiikin historiaa 1960- ja 1970-luvuilla. Aineisto sisältää muistitietoa ja muistelupuhetta.\n\nAineisto on tulossa saataville Kielipankkiin."
},
"field_of_science": [
{
"id": "f7ee5ba6-f6af-48d0-bb0f-ea26851256d2",
"url": "http://www.yso.fi/onto/okm-tieteenala/ta6121",
"in_scheme": "http://www.yso.fi/onto/okm-tieteenala/conceptscheme",
"pref_label": {
"en": "Languages",
"fi": "Kielitieteet",
"sv": "Språkvetenskaper"
}
}
],
"infrastructure": [],
"keyword": [],
"language": [
{
"id": "b0c7eada-5b22-48b8-86c2-16e996ca2681",
"url": "http://lexvo.org/id/iso639-3/fin",
"in_scheme": "http://lexvo.org/id/",
"pref_label": {
"en": "Finnish",
"fi": "suomi",
"sv": "finska"
}
}
],
"metadata_owner": {
"id": "003e43bb-cfd5-43a5-92c8-1a6cbec7f76c",
"organization": "service_kielipankki"
},
"other_identifiers": [],
"persistent_identifier": "urn:nbn:fi:lb-2020030421",
"pid_generated_by_fairdata": false,
"projects": [],
"provenance": [],
"relation": [],
"remote_resources": [],
"spatial": [],
"state": "published",
"temporal": [],
"theme": [],
"title": {
"en": "Finnish electroacoustic music interviews",
"fi": "Suomalaisen elektroakustisen musiikin haastatteluaineisto"
},
"created": "2024-03-09T00:00:00Z",
"modified": "2024-06-19T08:19:02Z",
"dataset_versions": [
{
"id": "a3837165-7d27-4f84-9a90-3348595ae786",
"title": {
"en": "Finnish electroacoustic music interviews",
"fi": "Suomalaisen elektroakustisen musiikin haastatteluaineisto"
},
"persistent_identifier": "urn:nbn:fi:lb-2020030421",
"state": "published",
"created": "2024-03-09T00:00:00Z",
"version": 1
}
],
"published_revision": 14,
"version": 1,
"api_version": 3,
"metadata_repository": "Fairdata"
}
]
}