Profile Examples¶
This section provides examples of dataset profiles generated by the Dataset Profiler service for different types of datasets.
The profiles are in JSON-LD format and extend the Croissant Metadata Schema.
Note
As the Dataset Profiler service is under active development, the examples provided here are illustrative and may not represent the full capabilities of the service. Future versions may include additional features and improvements.
Example 1: Single Tabular Dataset (CSV)¶
Dataset Page: TODO WHEN AVAILABLE
Show profile
{
"@context": {
"@language": "en",
"@vocab": "https://schema.org/",
"cr": "http://mlcommons.org/croissant/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dg": "http://datagems.eu/TBD",
"data": {
"@id": "cr:data",
"@type": "@json"
},
"dataType": {
"@id": "cr:dataType",
"@type": "@vocab"
},
"examples": {
"@id": "cr:examples",
"@type": "@json"
},
"conformsTo": "dct:conformsTo",
"citeAs": "cr:citeAs",
"column": "cr:column",
"extract": "cr:extract",
"field": "cr:field",
"fileProperty": "cr:fileProperty",
"fileObject": "cr:fileObject",
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "cr:key",
"md5": "cr:md5",
"parentField": "cr:parentField",
"path": "cr:path",
"recordSet": "cr:recordSet",
"references": "cr:references",
"regex": "cr:regex",
"repeated": "cr:repeated",
"replace": "cr:replace",
"sc": "https://schema.org/",
"separator": "cr:separator",
"source": "cr:source",
"subField": "cr:subField",
"transform": "cr:transform",
"access": "dg:access",
"uploadedBy": "dg:uploadedBy",
"wd": "https://www.wikidata.org/wiki/"
},
"@type": "sc:Dataset",
"@id": "8930240b-a0e8-46e7-ace8-aab2b42fcc01",
"name": "Mathematics Learning Assessment",
"description": "This dataset was extracted from the MathE platform, an online educational platform developed to support mathematics teaching and learning in higher education. It contains 546 student responses to questions on several mathematical topics. Each record corresponds to an individual answer and includes the following features: Student ID, Student Country, Question ID, Type of Answer (correct or incorrect), Question Level (basic or advanced based on the assessment of the contributing professor), Math Topic (broader mathematical area of the question), Math Subtopic, and Question Keywords. The data spans from February 2019 to December 2023.",
"conformsTo": "",
"citeAs": "",
"license": "CC0 1.0",
"url": "https://dados.ipb.pt//dataset.xhtml?persistentId=doi:10.34620/dadosipb/PW3OWY",
"doi": "",
"version": "",
"headline": "Dataset for Assessing Mathematics Learning in Higher Education.",
"keywords": [
"math",
"student",
"higher education"
],
"fieldOfScience": [
"MATHEMATICS"
],
"inLanguage": [
"en"
],
"country": "PT",
"datePublished": "24-05-2025",
"access": "PUBLIC",
"uploadedBy": "ADMIN",
"distribution": [
{
"@type": "cr:FileObject",
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883",
"name": "mathe_assessment_dataset.csv",
"description": "",
"contentSize": "1057461 B",
"contentUrl": "s3://datagems/dataset_id/mathe_assessment_dataset.csv",
"encodingFormat": "text/csv",
"sha256": "1cadacd304e2a9367a4f8effdfa0a7d70bee723d6f4b08b70c58d75e9aae6440"
}
],
"recordSet": [
{
"@type": "cr:RecordSet",
"@id": "fc9de940-ab78-4c3e-a087-cb2b194bf2af",
"name": "mathe_assessment_dataset",
"description": "",
"field": [
{
"@type": "cr:Field",
"@id": "e71b6165-22b9-4982-bde0-4cf2387d9a4b",
"name": "Student ID",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Student ID"
}
},
"sample": [
1179,
955,
1026
]
},
{
"@type": "cr:Field",
"@id": "1eb027c3-594f-4d89-befe-07e3aa143253",
"name": "Student Country",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Student Country"
}
},
"sample": [
"Lithuania",
"Italy",
"Italy"
]
},
{
"@type": "cr:Field",
"@id": "e9f584a7-36db-416a-8bed-0476348f944c",
"name": "Question ID",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Question ID"
}
},
"sample": [
429,
788,
431
]
},
{
"@type": "cr:Field",
"@id": "6368937e-5651-48aa-909d-ad9b281b45d7",
"name": "Type of Answer",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Type of Answer"
}
},
"sample": [
0,
1,
0
]
},
{
"@type": "cr:Field",
"@id": "7b80103b-1534-4b52-8703-2525ff14833f",
"name": "Question Level",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Question Level"
}
},
"sample": [
"Basic",
"Advanced",
"Basic"
]
},
{
"@type": "cr:Field",
"@id": "479ff0d5-8052-4fe4-a889-88284bb3878a",
"name": "Topic",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Topic"
}
},
"sample": [
"Integration",
"Statistics",
"Linear Algebra"
]
},
{
"@type": "cr:Field",
"@id": "b3fa9433-a5cd-4023-86e7-fadebd9f74c0",
"name": "Subtopic",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Subtopic"
}
},
"sample": [
"Nonlinear Optimization",
"Statistics",
"Linear Systems"
]
},
{
"@type": "cr:Field",
"@id": "ca2c3445-b9c0-4d44-a2d0-afa6dc05e079",
"name": "Keywords",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "765bc5d1-661b-462e-82b2-3f2fc76d9883"
},
"extract": {
"column": "Keywords"
}
},
"sample": [
"Injective linear application,Invertible linear operator,Isomorphism",
"Separable variables equation",
"Subspace,Span,Linear combination"
]
}
]
}
]
}
Example 2: Multiple CSV Files¶
Dataset Page: TODO WHEN AVAILABLE
Show profile
{
"@context": {
"@language": "en",
"@vocab": "https://schema.org/",
"cr": "http://mlcommons.org/croissant/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dg": "http://datagems.eu/TBD",
"data": {
"@id": "cr:data",
"@type": "@json"
},
"dataType": {
"@id": "cr:dataType",
"@type": "@vocab"
},
"examples": {
"@id": "cr:examples",
"@type": "@json"
},
"conformsTo": "dct:conformsTo",
"citeAs": "cr:citeAs",
"column": "cr:column",
"extract": "cr:extract",
"field": "cr:field",
"fileProperty": "cr:fileProperty",
"fileObject": "cr:fileObject",
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "cr:key",
"md5": "cr:md5",
"parentField": "cr:parentField",
"path": "cr:path",
"recordSet": "cr:recordSet",
"references": "cr:references",
"regex": "cr:regex",
"repeated": "cr:repeated",
"replace": "cr:replace",
"sc": "https://schema.org/",
"separator": "cr:separator",
"source": "cr:source",
"subField": "cr:subField",
"transform": "cr:transform",
"access": "dg:access",
"uploadedBy": "dg:uploadedBy",
"wd": "https://www.wikidata.org/wiki/"
},
"@type": "sc:Dataset",
"@id": "729f8932-8300-4ac5-a71e-b41904ca7433",
"name": "Meteorological data time series (Daily - UTC)",
"description": "The dataset includes daily meteorological parameters for the period 2010-2023 from 54 meteorological stations. You can see visualizations of the data from these stations on the page https://www.meteo.gr/climate/ The parameters are temperature (\u00b0C), relative humidity (%), pressure (hPa), rainfall (mm), wind speed (km/h), dominant wind direction, and wind gust (km/h). The measurements come from the network of automatic meteorological stations of the National Observatory of Athens/meteo.gr. Information about the stations and any malfunctions are provided at http://meteosearch.meteo.gr/ on the respective station's page. Reference to the network is made through the scientific publication https://rmets.onlinelibrary.wiley.com/doi/full/10.1002/gdj3.44",
"conformsTo": "",
"citeAs": "",
"license": "CC-BY-SA 4.0",
"url": "https://data.climpact.gr/dataset/497dc26d-45e0-4ad5-b8f3-5f8890f65129",
"doi": "",
"version": "",
"headline": "Daily meteorological parameters for the period 2010-2023 from 54 meteorological stations in Greece.",
"keywords": [
"weather",
"greece",
"weather prediction"
],
"fieldOfScience": [
"EARTH AND RELATED ENVIRONMENTAL SCIENCES"
],
"inLanguage": [
"el"
],
"country": "GR",
"datePublished": "24-05-2025",
"access": "PUBLIC",
"uploadedBy": "ADMIN",
"distribution": [
{
"@type": "cr:FileObject",
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7",
"name": "cities.csv",
"description": "",
"contentSize": "22582102 B",
"contentUrl": "s3://datagems/dataset_id/cities.csv",
"encodingFormat": "text/csv",
"sha256": "59ed113686e1f21a1a87e75e0909af71a71ef05472b334fa7beccc2100ec5245"
},
{
"@type": "cr:FileObject",
"@id": "eace3f55-5742-4e3e-85c9-2ea47ae1547d",
"name": "stations_list.csv",
"description": "",
"contentSize": "1917 B",
"contentUrl": "s3://datagems/dataset_id/stations_list.csv",
"encodingFormat": "text/csv",
"sha256": "04fd252b6e957e88175f4ab4a4c7799e81e1508f22628c44fa13c8034eae508e"
}
],
"recordSet": [
{
"@type": "cr:RecordSet",
"@id": "9aabef19-b704-4e57-812a-97cd12ccf792",
"name": "cities",
"description": "",
"field": [
{
"@type": "cr:Field",
"@id": "25fec8bd-b5c8-4ecb-801b-355ce419cf8f",
"name": "Date",
"description": "",
"dataType": "sc:Date",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "Date"
}
},
"sample": [
"2013-05-26",
"2021-04-14",
"2021-09-19"
]
},
{
"@type": "cr:Field",
"@id": "e6336fa3-f4b3-4495-b84b-1ea4d45bc5ec",
"name": "T_mean",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "T_mean"
}
},
"sample": [
"17.9",
"13.5",
"20.0"
]
},
{
"@type": "cr:Field",
"@id": "8dcb82d8-2212-4958-ab9c-620b4840fd0c",
"name": "T_max",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "T_max"
}
},
"sample": [
"30.7",
"22.0",
"17.2"
]
},
{
"@type": "cr:Field",
"@id": "33433ee4-7a97-40d2-bd34-afc633f315aa",
"name": "T_min",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "T_min"
}
},
"sample": [
"12.8",
"25.7",
"7.5"
]
},
{
"@type": "cr:Field",
"@id": "72b11e3f-fa48-4fc0-b232-f37cdcea745c",
"name": "RH_mean",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "RH_mean"
}
},
"sample": [
"65.1",
"64.8",
"84.2"
]
},
{
"@type": "cr:Field",
"@id": "c94f212d-9a06-41bd-834f-e30388dd4b3c",
"name": "RH_max",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "RH_max"
}
},
"sample": [
"76",
"97",
"83"
]
},
{
"@type": "cr:Field",
"@id": "830176c9-42d1-497f-afcc-0e7dcf609415",
"name": "RH_min",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "RH_min"
}
},
"sample": [
"49",
"71",
"26"
]
},
{
"@type": "cr:Field",
"@id": "45570e8e-a714-4e51-b323-2ac0277557a7",
"name": "Prs_mean",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "Prs_mean"
}
},
"sample": [
"1008.5",
"1033.2",
"1022.7"
]
},
{
"@type": "cr:Field",
"@id": "5a2859bf-1664-4f7e-abcc-b6949f72aa3b",
"name": "Prs_max",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "Prs_max"
}
},
"sample": [
"1010.2",
"1017.7",
"1017.2"
]
},
{
"@type": "cr:Field",
"@id": "cd3ad217-8199-411e-b011-e4d447aaad5b",
"name": "Prs_min",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "Prs_min"
}
},
"sample": [
"---",
"1013.8",
"1003.9"
]
},
{
"@type": "cr:Field",
"@id": "7cbd3887-b9fe-4312-a7a6-23df5bde6ba7",
"name": "Ac_R",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "Ac_R"
}
},
"sample": [
"0.0",
"0.0",
"0.0"
]
},
{
"@type": "cr:Field",
"@id": "2ca2f524-27ef-4aeb-a15a-ace7e9325f92",
"name": "WS_mean",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "WS_mean"
}
},
"sample": [
"3.8",
"3.7",
"4.2"
]
},
{
"@type": "cr:Field",
"@id": "40077a17-d92f-4338-a01b-4a1e01f5e787",
"name": "DWD",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "DWD"
}
},
"sample": [
"SSE",
"NW",
"ENE"
]
},
{
"@type": "cr:Field",
"@id": "7df62090-6456-4a7c-b612-8f304ba8a104",
"name": "WG",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "WG"
}
},
"sample": [
"14.5",
"20.9",
"35.4"
]
},
{
"@type": "cr:Field",
"@id": "63a3e364-03f5-43c5-b305-67a6e83f7622",
"name": "city",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "19532c0b-6218-4f86-93e9-69b5812ce7f7"
},
"extract": {
"column": "city"
}
},
"sample": [
"ithaki",
"veroia",
"argos"
]
}
]
},
{
"@type": "cr:RecordSet",
"@id": "98ae4cae-0503-4159-961d-3b6c079da352",
"name": "stations_list",
"description": "",
"field": [
{
"@type": "cr:Field",
"@id": "2cb871be-d08e-496c-8762-6cf9356e97e6",
"name": "stations",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "eace3f55-5742-4e3e-85c9-2ea47ae1547d"
},
"extract": {
"column": "stations"
}
},
"sample": [
"tripoli",
"samothraki",
"samos"
]
},
{
"@type": "cr:Field",
"@id": "c2176701-e679-473a-a533-af99e40389a3",
"name": "latitude",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "eace3f55-5742-4e3e-85c9-2ea47ae1547d"
},
"extract": {
"column": "latitude"
}
},
"sample": [
39.627532,
40.794947,
37.795072
]
},
{
"@type": "cr:Field",
"@id": "ceb9d821-d850-4daa-90db-241358cc3b10",
"name": "longitude",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "eace3f55-5742-4e3e-85c9-2ea47ae1547d"
},
"extract": {
"column": "longitude"
}
},
"sample": [
28.086268,
25.106667,
21.445693
]
},
{
"@type": "cr:Field",
"@id": "d9513bee-ebe7-4cd5-8447-8b38bef1efd9",
"name": "altitude",
"description": "",
"dataType": "sc:Float",
"source": {
"fileObject": {
"@id": "eace3f55-5742-4e3e-85c9-2ea47ae1547d"
},
"extract": {
"column": "altitude"
}
},
"sample": [
650.0,
60.0,
120.0
]
}
]
}
]
}
Example 3: Multiple File Types¶
Dataset Page: TODO WHEN AVAILABLE
Show profile
{
"@context": {
"@language": "en",
"@vocab": "https://schema.org/",
"cr": "http://mlcommons.org/croissant/",
"rai": "http://mlcommons.org/croissant/RAI/",
"dg": "http://datagems.eu/TBD",
"data": {
"@id": "cr:data",
"@type": "@json"
},
"dataType": {
"@id": "cr:dataType",
"@type": "@vocab"
},
"examples": {
"@id": "cr:examples",
"@type": "@json"
},
"conformsTo": "dct:conformsTo",
"citeAs": "cr:citeAs",
"column": "cr:column",
"extract": "cr:extract",
"field": "cr:field",
"fileProperty": "cr:fileProperty",
"fileObject": "cr:fileObject",
"fileSet": "cr:fileSet",
"format": "cr:format",
"includes": "cr:includes",
"isLiveDataset": "cr:isLiveDataset",
"jsonPath": "cr:jsonPath",
"key": "cr:key",
"md5": "cr:md5",
"parentField": "cr:parentField",
"path": "cr:path",
"recordSet": "cr:recordSet",
"references": "cr:references",
"regex": "cr:regex",
"repeated": "cr:repeated",
"replace": "cr:replace",
"sc": "https://schema.org/",
"separator": "cr:separator",
"source": "cr:source",
"subField": "cr:subField",
"transform": "cr:transform",
"access": "dg:access",
"uploadedBy": "dg:uploadedBy",
"wd": "https://www.wikidata.org/wiki/"
},
"@type": "sc:Dataset",
"@id": "a36dde9d-7f95-40ff-8a9b-f687d0a72318",
"name": "Integration Techniques Questions MathE Database",
"description": "MathE is a mathematical platform developed under the MathE project (mathe.ipb.pt) for higher education. The dataset associated with the Integration topic and the Integration techniques subtopic has 61 questions, the correct answer and two incorrect answers with common errors made by students, keywords associated and material association. The database is organised as follows: Question ID, topic name; subtopic name; question; Correct answer, Incorrect answer 1, incorrect answer 2, keywords, Videos, Material.",
"conformsTo": "",
"citeAs": "",
"license": "CC0 1.0",
"url": "https://doi.org/10.34620/dadosipb/3TUVJX",
"doi": "",
"version": "",
"headline": "Integration Techniques Questions MathE Database.",
"keywords": [
"math",
"student",
"higher education"
],
"fieldOfScience": [
"MATHEMATICS"
],
"inLanguage": [
"en"
],
"country": "PT",
"datePublished": "24-05-2025",
"access": "PUBLIC",
"uploadedBy": "ADMIN",
"distribution": [
{
"@type": "cr:FileSet",
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363",
"name": "PDF",
"description": "",
"contentSize": "1414800 B",
"contentUrl": "s3://datagems/dataset_id/",
"encodingFormat": "application/pdf",
"includes": "PDF/*"
},
{
"@type": "cr:FileSet",
"@id": "ca2073a1-cef2-4cb6-ab8e-7327042a080c",
"name": "txt",
"description": "",
"contentSize": "2354 B",
"contentUrl": "s3://datagems/dataset_id/",
"encodingFormat": "text/plain",
"includes": "txt/*"
},
{
"@type": "cr:FileObject",
"@id": "83c1e803-ebf6-481a-bb92-4bcea5bfcd3f",
"name": "Keywords.csv",
"description": "",
"contentSize": "243 B",
"contentUrl": "s3://datagems/dataset_id/Keywords.csv",
"encodingFormat": "text/csv",
"sha256": "0cd9cf630e410b6740ee9234a30472270b33c7e3a23b7c164dd40fc19b1fa659"
},
{
"@type": "cr:FileObject",
"@id": "b45656dd-9319-4bd6-913a-521208074bf9",
"name": "IntegrationTechniques.csv",
"description": "",
"contentSize": "26051 B",
"contentUrl": "s3://datagems/dataset_id/IntegrationTechniques.csv",
"encodingFormat": "text/csv",
"sha256": "eea1e6da7f7ab9169b5afdf5e360403aba6f987dd13eefc735edfc315d59df59"
},
{
"@type": "cr:FileObject",
"@id": "20d9223f-89f2-46a9-98f5-82abedb8aff4",
"name": "Videos.csv",
"description": "",
"contentSize": "262 B",
"contentUrl": "s3://datagems/dataset_id/Videos.csv",
"encodingFormat": "text/csv",
"sha256": "2b638eeacbef36ef3908a61d86e9d075dd2ee56d20b9ff64db385cca254810e0"
}
],
"recordSet": [
{
"@type": "cr:RecordSet",
"@id": "2b91519b-8a07-4831-9193-c15d5a69a0df",
"name": "Keywords",
"description": "",
"field": [
{
"@type": "cr:Field",
"@id": "18a4701e-14cd-47d3-bfe4-61a441bbf0de",
"name": "ID",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "83c1e803-ebf6-481a-bb92-4bcea5bfcd3f"
},
"extract": {
"column": "ID"
}
},
"sample": [
438,
102,
106
]
},
{
"@type": "cr:Field",
"@id": "37fd0ca7-a42e-4542-9c2e-1d46db6b8463",
"name": "name",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "83c1e803-ebf6-481a-bb92-4bcea5bfcd3f"
},
"extract": {
"column": "name"
}
},
"sample": [
"Integration by parts",
"Substitution",
"Rational functions"
]
}
]
},
{
"@type": "cr:RecordSet",
"@id": "fabea8a0-ee52-4a93-b960-4b037cedada1",
"name": "IntegrationTechniques",
"description": "",
"field": [
{
"@type": "cr:Field",
"@id": "151c6a71-eba3-49c5-a7cf-ab97d9b5dfd1",
"name": "id",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "id"
}
},
"sample": [
1822,
1828,
1967
]
},
{
"@type": "cr:Field",
"@id": "d3f9d776-cdd2-4ee5-91e9-c95f9f646d17",
"name": "topic",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "topic"
}
},
"sample": [
"integration",
"integration",
"integration"
]
},
{
"@type": "cr:Field",
"@id": "0408236e-06e5-4dcd-9a50-22fea784eeec",
"name": "subtopic",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "subtopic"
}
},
"sample": [
"integration techniques",
"integration techniques",
"integration techniques"
]
},
{
"@type": "cr:Field",
"@id": "3e250f6b-4132-49d8-8b79-96d2e1f1e794",
"name": "question",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "question"
}
},
"sample": [
"Find $\\displaystyle\\int\\sin\\left(2x\\right)\\operatorname{e}^{\\sin^2\\left(x\\right)}\\, dx$",
"Find $\\displaystyle\\int \\frac{\\sqrt{x^2-9}}{x^3}\\: dx$",
"Find $\\displaystyle \\int 2x+2x^2-\\dfrac{1}{x}\\, dx$"
]
},
{
"@type": "cr:Field",
"@id": "f4192fbe-31dd-487d-b5cf-9a2e6ffdb3ac",
"name": "correct",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "correct"
}
},
"sample": [
"$\\displaystyle \\dfrac{1}{2}\\ln\\left|\\ln \\left(x^2\\right)\\right|+c$",
"$\\displaystyle\\dfrac{x^4}{4}+\\dfrac{\\operatorname{e}^{3x}} {3} + c$\n",
"$\\displaystyle \\dfrac{x^3}{3}-x+\\ln \\left|x\\right|+c$"
]
},
{
"@type": "cr:Field",
"@id": "1fceabf9-5663-46f6-bb69-3e6d6fa8e0c0",
"name": "incorrect1",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "incorrect1"
}
},
"sample": [
"$\\displaystyle -\\ln \\left|x+1\\right|+\\ln \\left|x-1\\right|-\\left(\\ln \\left|x^2-1\\right|-\\ln \\left(x\\right)\\right)+c$\n",
"$\\displaystyle 2\\ln \\left|x^2+1\\right|+\\arctan \\left(x\\right)+C$",
"$\\displaystyle\\frac{1}{2}\\sqrt{x^2+2x}-\\ln\\left|{x+1+\\sqrt{x^2+2x-1}}\\right|+c $"
]
},
{
"@type": "cr:Field",
"@id": "3473df3f-ebc8-4d8e-ad8e-b3f1ba3ad6df",
"name": "incorrect2",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "incorrect2"
}
},
"sample": [
"$\\displaystyle \\dfrac{1}{2}\\left(x-\\sin(x)\\cos(x)\\right)+c$\n",
"$\\displaystyle \\tan\\left(\\frac{x}{2}\\right)+\\cos \\left(x\\right)+C$",
"$\\displaystyle \\frac{x^2}{2}+5\\ln|x|-x^2e^x+2xe^x-2e^x+C$"
]
},
{
"@type": "cr:Field",
"@id": "84463c80-f891-457d-b7b2-87b4d60edd42",
"name": "incorrect3",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "incorrect3"
}
},
"sample": [
"$\\displaystyle\\dfrac{\\sec^5\\left(x\\right)}{5}-\\dfrac{\\sec ^3\\left(x\\right)}{3}+c$",
"$\\displaystyle \\dfrac{1}{4}\\arctan(\\operatorname{e}^{4x})+c$",
"$\\displaystyle 2\\ln \\left|x^2+1\\right|+\\frac{1}{2}\\arctan \\left(x\\right)+C$"
]
},
{
"@type": "cr:Field",
"@id": "37d04014-ef75-4499-a99c-e7536ac83967",
"name": "keywords",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "keywords"
}
},
"sample": [
"101,103,107",
"103,107",
"102,105"
]
},
{
"@type": "cr:Field",
"@id": "580fa1b2-c88a-4a40-8052-278a138a1aef",
"name": "video_material",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "video_material"
}
},
"sample": [
"539,540,541,542,630",
"539,540,541,542,630,753",
"539,541,542,545,620,621,630"
]
},
{
"@type": "cr:Field",
"@id": "1164b3a6-4d36-4aa1-aea8-0bf1348c58bd",
"name": "pdf_material",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "b45656dd-9319-4bd6-913a-521208074bf9"
},
"extract": {
"column": "pdf_material"
}
},
"sample": [
"130,131,135,137",
"129,131,137",
"130,131,135,137"
]
}
]
},
{
"@type": "cr:RecordSet",
"@id": "690fcc22-65b2-43c2-b5cc-1ae7a7c87500",
"name": "Videos",
"description": "",
"field": [
{
"@type": "cr:Field",
"@id": "1864c0a7-0996-4384-bb38-62d6e7546ce0",
"name": "id",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "20d9223f-89f2-46a9-98f5-82abedb8aff4"
},
"extract": {
"column": "id"
}
},
"sample": [
485,
540,
620
]
},
{
"@type": "cr:Field",
"@id": "fc97432e-2558-469b-9892-a0e7ee7e91f2",
"name": "link",
"description": "",
"dataType": "sc:Text",
"source": {
"fileObject": {
"@id": "20d9223f-89f2-46a9-98f5-82abedb8aff4"
},
"extract": {
"column": "link"
}
},
"sample": [
"TyPRUjJ-Png",
"gyfDSkGrYuo",
"RjvYKVmV2EQ"
]
},
{
"@type": "cr:Field",
"@id": "74bd6479-c36f-4e4e-92ca-a4ec7f29d9dc",
"name": "keywords",
"description": "",
"dataType": "sc:Integer",
"source": {
"fileObject": {
"@id": "20d9223f-89f2-46a9-98f5-82abedb8aff4"
},
"extract": {
"column": "keywords"
}
},
"sample": [
"103, 107",
"106",
"103, 107"
]
}
]
},
{
"@type": "cr:RecordSet",
"@id": "e81fa048-a43f-4368-b060-7630152759d5",
"name": "PDF",
"description": "",
"field": [
{
"@type": "dg:Document",
"@id": "df00478c-999e-4c15-936d-e132facd4ca8",
"contentUrl": "s3://datagems/dataset_id/PDF/135.pdf",
"name": "135",
"file_size_bytes": 167002,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "4d4230e0-bb4d-4691-b785-4de76384a894",
"contentUrl": "s3://datagems/dataset_id/PDF/137.pdf",
"name": "137",
"file_size_bytes": 177015,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "2c6c4e62-3f07-4d8c-968d-705bfbcbcad6",
"contentUrl": "s3://datagems/dataset_id/PDF/132.pdf",
"name": "132",
"file_size_bytes": 64697,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "30a8669b-1e4c-4333-bea5-a3f866840b5b",
"contentUrl": "s3://datagems/dataset_id/PDF/134.pdf",
"name": "134",
"file_size_bytes": 38248,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "bfc1a4a6-10d3-4561-89cb-972b80e84c36",
"contentUrl": "s3://datagems/dataset_id/PDF/138.pdf",
"name": "138",
"file_size_bytes": 243991,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "a34c1eb2-1bf1-48f6-9a19-15751c30199f",
"contentUrl": "s3://datagems/dataset_id/PDF/133.pdf",
"name": "133",
"file_size_bytes": 70364,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "5777d24c-ef5d-4343-821c-a837ef92a30c",
"contentUrl": "s3://datagems/dataset_id/PDF/130.pdf",
"name": "130",
"file_size_bytes": 67346,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "781c7f10-b01a-47e3-92c6-4ceec74ef9b4",
"contentUrl": "s3://datagems/dataset_id/PDF/131.pdf",
"name": "131",
"file_size_bytes": 404904,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "1d02ed2c-14f8-4fe9-abb1-ea92b1aa5028",
"contentUrl": "s3://datagems/dataset_id/PDF/129.pdf",
"name": "129",
"file_size_bytes": 86189,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
},
{
"@type": "dg:Document",
"@id": "7e9f886f-d71d-4d96-b374-422e51560dcf",
"contentUrl": "s3://datagems/dataset_id/PDF/136.pdf",
"name": "136",
"file_size_bytes": 95044,
"keywords": [
"Keyword 1",
"Keyword 2"
],
"summary": "This is an example summary",
"source": {
"fileSet": {
"@id": "b5d865d1-8c7b-4e6d-a545-b17c5e881363"
}
}
}
]
},
{
"@type": "cr:RecordSet",
"@id": "55c0df7f-1c47-4437-b350-0963c5694e6b",
"name": "txt",
"description": "",
"field": [
{
"@type": "dg:Document",
"@id": "ae78dc0a-66d1-4abf-9750-b5d00bf3119b",
"name": "MANIFEST",
"contentUrl": "s3://datagems/dataset_id/txt/MANIFEST.TXT",
"file_size_bytes": 602,
"summary": "This is an example summary",
"keywords": [
"Keyword 1",
"Keyword 2"
],
"source": {
"fileSet": {
"@id": "ca2073a1-cef2-4cb6-ab8e-7327042a080c"
}
}
},
{
"@type": "dg:Document",
"@id": "6b388d71-e73f-4cd5-ae3a-3dbd86d3c45d",
"name": "readme",
"contentUrl": "s3://datagems/dataset_id/txt/readme.txt",
"file_size_bytes": 1752,
"summary": "This is an example summary",
"keywords": [
"Keyword 1",
"Keyword 2"
],
"source": {
"fileSet": {
"@id": "ca2073a1-cef2-4cb6-ab8e-7327042a080c"
}
}
}
]
}
]
}