Reading And Writing YAML
ProcessCore.Yaml turns YAML documents into the same in-memory graph objects provided by ProcessCore.
Load a profile-shaped assay example. Passing false means lenient mode: type decorations and extra profile fields are accepted and preserved where possible.
let assayYaml =
System.IO.Path.Combine(__SOURCE_DIRECTORY__, "../../examples/isa/assay_proteomics.yml")
|> System.IO.File.ReadAllText
Show assay YAML
type: Dataset
additionalType: Assay
identifier: measurement1
creators:
-
type: Person
givenName: Oliver
affiliation:
type: Organization
name: RPTU University of Kaiserslautern
email: mailto:maus@nfdi4plants.org
familyName: Maus
jobTitles:
"@id": http://purl.org/spar/scoro/research-assistant
"@type": DefinedTerm
name: research assistant
termCode: http://purl.org/spar/scoro/research-assistant
labProtocols:
-
"@id": "#Protocol_Growth"
type: LabProtocol
labEquipments:
"@id": "#Component_growth_environment_bioreactor"
-
"@id": "#Protocol_Cell_Lysis"
type: LabProtocol
labEquipments:
"@id": "#Component_centrifuge_Eppendorf™_Centrifuge_5420"
-
"@id": "#Protocol_MS_Run"
type: LabProtocol
labEquipments:
"@id": "#Component_mass_spectrometer_Q_Exactive_9000"
-
"@id": "#Protocol_Computational_Proteome_Analysis"
type: LabProtocol
propertyValues:
-
"@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
type: PropertyValue
additionalType: ParameterValue # inherits from PropertyValue
name: sonicator
nameTAN: https://bioregistry.io/OBI:0400114
value: Fisherbrand™ Model 705 Sonic Dismembrator
valueTAN: https://bioregistry.io/OBI:5453453
-
"@id": "#ParameterValue_time_10_minute"
type: PropertyValue
additionalType: ParameterValue
name: time
nameTAN: https://bioregistry.io/PATO:0000165
value: 10
unit: minute
unitTAN: https://bioregistry.io/UO:0000031
-
"@id": "#ParameterValue_technical_replicate_group_1"
type: PropertyValue
additionalType: ParameterValue
name: technical replicate group
nameTAN: https://bioregistry.io/DPBO:1000184
value: 1
-
"@id": "#ParameterValue_technical_replicate_group_2"
type: PropertyValue
additionalType: ParameterValue
name: technical replicate group
nameTAN: https://bioregistry.io/DPBO:1000184
value: 2
-
"@id": "#ParameterValue_technical_replicate_group_3"
type: PropertyValue
additionalType: ParameterValue
name: technical replicate group
nameTAN: https://bioregistry.io/DPBO:1000184
value: 3
-
"@id": "#ParameterValue_software_ProteomIqon"
type: PropertyValue
additionalType: ParameterValue
name: software
nameTAN: https://bioregistry.io/IAO_0000010
value: ProteomIQon
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
type: PropertyValue
additionalType: CharacteristicValue
name: organism
nameTAN: https://bioregistry.io/SIO:010000
value: Arabidopsis thaliana
valueTAN: https://bioregistry.io/NCBITaxon:3702
-
"@id": "#FactorValue_temperature_25_degree_Celsius"
type: PropertyValue
additionalType: FactorValue
name: temperature
nameTAN: https://bioregistry.io/NCRO:0000029
value: 25
unit: degree Celsius
unitTAN: https://bioregistry.io/UO:0000027
-
"@id": "#FactorValue_temperature_30_degree_Celsius"
type: PropertyValue
additionalType: FactorValue
name: temperature
nameTAN: https://bioregistry.io/NCRO:0000029
value: 30
unit: degree Celsius
unitTAN: https://bioregistry.io/UO:0000027
-
"@id": "#Component_growth_environment_bioreactor"
type: PropertyValue
additionalType: Component
name: growth environment
nameTAN: https://bioregistry.io/OBI:0000997
value: bioreactor
valueTAN: https://bioregistry.io/OBI:0001046
-
"@id": "#Component_mass_spectrometer_Q_Exactive_9000"
type: PropertyValue
additionalType: Component
name: mass spectrometer
nameTAN: https://bioregistry.io/OBI:0000049
value: Q Exactive 9000
processes:
-
# Possible worksheet grouping
type: LabProcess
name: Growth
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Base Culture
additionalProperty:
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
outputs:
- type: Material
additionalType: Sample
name: Cultivation Flask RT
additionalProperty:
-
"@id": "#FactorValue_temperature_25_degree_Celsius"
executesProtocol:
"@id": "#Protocol_Growth"
-
type: LabProcess
name: Growth
inputs:
- type: Material
additionalType: Source
name: Base Culture
additionalProperty:
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
outputs:
- type: Material
additionalType: Sample
name: Cultivation Flask HT
additionalProperty:
-
"@id": "#FactorValue_temperature_30_degree_Celsius"
executesProtocol:
"@id": "#Protocol_Growth"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material
additionalType: Source # = additionalType: [Source]
name: Cultivation Flask RT
outputs:
- type: Material
additionalType: Sample
name: Eppi RT 1
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_1"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask RT
outputs:
- type: Material
additionalType: Sample
name: Eppi RT 2
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_2"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask RT
outputs:
- type: Material
additionalType: Sample
name: Eppi RT 3
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_3"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask HT
outputs:
- type: Material
additionalType: Sample
name: Eppi HT 1
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_1"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask HT
outputs:
- type: Material
additionalType: Sample
name: Eppi HT 2
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_2"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask HT
outputs:
- type: Material
additionalType: Sample
name: Eppi HT 3
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_3"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi RT 1
outputs:
- type: Data
path: sample1.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi RT 2
outputs:
- type: Data
path: sample2.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi RT 3
outputs:
- type: Data
path: sample3.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi HT 1
outputs:
- type: Data
path: sample4.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi HT 2
outputs:
- type: Data
path: sample5.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi HT 3
outputs:
- type: Data
path: sample6.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample1.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=12"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample2.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=13"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample3.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=14"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample4.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=15"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample5.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=16"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample6.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=17"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
additionalProperty:
- # = generalProperty: (measurement type)
type: PropertyValue
name: variableMeasured
nameTAN: https://schema.org/variableMeasured
value: proteomics
valueTAN: https://bioregistry.io/MS:1003348
let assay = ProcessCore.Yaml.Dataset.fromYamlString false assayYaml
let assayShape =
[ "identifier", assay.Identifier
"additionalType", assay.AdditionalType |> Option.defaultValue ""
"processes", string assay.Processes.Count
"data nodes", string (assay.AllData().Count) ]
assayShape
|
Strict mode is useful for core-shaped YAML. The same ISA/profile-shaped example contains extra fields, so strict mode rejects it.
let strictModeResult =
try
ProcessCore.Yaml.Dataset.fromYamlString true assayYaml |> ignore
"Strict mode accepted this YAML."
with ex ->
"Strict mode rejected this YAML: " + firstLine ex.Message
strictModeResult
|
Writing can use inline objects or top-level indexes. Inline YAML is easy to inspect. Indexed YAML deduplicates repeated property values and protocols into propertyValues and labProtocols sections.
let small = Dataset("yaml-demo")
let protocol = LabProtocol()
protocol.Name <- Some "Growth"
protocol.AddLabEquipment(PropertyValue("growth chamber", value = "chamber-1", additionalType = "Component"))
let source = Material("Seedling")
source.AdditionalType <- Some "Source"
source.AddAdditionalProperty(PropertyValue("organism", value = "Arabidopsis thaliana", additionalType = "CharacteristicValue"))
let sample = Material("Leaf sample")
sample.AdditionalType <- Some "Sample"
sample.AddAdditionalProperty(PropertyValue("temperature", value = "25", unit = "degree Celsius", additionalType = "FactorValue"))
let growth = LabProcess("Growth")
growth.ExecutesProtocol <- Some protocol
growth.AddInputMaterial(source)
growth.AddOutputMaterial(sample)
growth.AddParameterValue(PropertyValue("duration", value = "7", unit = "day", additionalType = "ParameterValue"))
small.AddProcess(growth)
let inlineYaml = ProcessCore.Yaml.Dataset.toYamlString (Some 2) small
let indexedYaml = ProcessCore.Yaml.Dataset.toYamlStringIndexed (Some 2) small
Show inline YAML
type: Dataset
identifier: yaml-demo
processes:
-
type: LabProcess
name: Growth
inputs:
-
type: Material
name: Seedling
additionalType: Source
additionalProperty:
-
type: PropertyValue
name: organism
additionalType: CharacteristicValue
value: Arabidopsis thaliana
outputs:
-
type: Material
name: Leaf sample
additionalType: Sample
additionalProperty:
-
type: PropertyValue
name: temperature
additionalType: FactorValue
value: 25
unit: degree Celsius
executesProtocol:
type: LabProtocol
name: Growth
labEquipment:
-
type: PropertyValue
name: growth chamber
additionalType: Component
value: chamber-1
parameterValue:
-
type: PropertyValue
name: duration
additionalType: ParameterValue
value: 7
unit: day
Show indexed YAML
type: Dataset
identifier: yaml-demo
labProtocols:
-
type: LabProtocol
name: Growth
labEquipment:
-
"@id": "#Component_growth_chamber_chamber-1"
"@id": "#Protocol_Growth"
propertyValues:
-
type: PropertyValue
name: organism
additionalType: CharacteristicValue
value: Arabidopsis thaliana
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
-
type: PropertyValue
name: temperature
additionalType: FactorValue
value: 25
unit: degree Celsius
"@id": "#FactorValue_temperature_25_degree_Celsius"
-
type: PropertyValue
name: duration
additionalType: ParameterValue
value: 7
unit: day
"@id": "#ParameterValue_duration_7_day"
-
type: PropertyValue
name: growth chamber
additionalType: Component
value: chamber-1
"@id": "#Component_growth_chamber_chamber-1"
processes:
-
type: LabProcess
name: Growth
inputs:
-
type: Material
name: Seedling
additionalType: Source
additionalProperty:
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
outputs:
-
type: Material
name: Leaf sample
additionalType: Sample
additionalProperty:
-
"@id": "#FactorValue_temperature_25_degree_Celsius"
executesProtocol:
"@id": "#Protocol_Growth"
parameterValue:
-
"@id": "#ParameterValue_duration_7_day"
Round-tripping returns a new object graph with the same logical shape.
let roundTripped = ProcessCore.Yaml.Dataset.fromYamlString true inlineYaml
let roundTripShape =
[ "identifier", roundTripped.Identifier
"processes", string roundTripped.Processes.Count
"materials", string (roundTripped.AllMaterials().Count)
"property values", string (roundTripped.AllPropertyValues().Count) ]
roundTripShape
|
What To Use When
Task |
API |
|---|---|
Read profile-shaped YAML |
|
Read strict core-shaped YAML |
|
Write inline YAML |
|
Write indexed YAML |
|
Decode a specific type |
|
val string: value: 'T -> string
--------------------
type string = String
<summary>Provides methods for encoding and decoding URLs when processing Web requests.</summary>
Net.WebUtility.HtmlEncode(value: string, output: IO.TextWriter) : unit
(+0 other overloads)
String.Split( separator: char array) : string array
(+0 other overloads)
String.Split(separator: string array, options: StringSplitOptions) : string array
(+0 other overloads)
String.Split(separator: string, ?options: StringSplitOptions) : string array
(+0 other overloads)
String.Split(separator: char array, options: StringSplitOptions) : string array
(+0 other overloads)
String.Split(separator: char array, count: int) : string array
(+0 other overloads)
String.Split(separator: char, ?options: StringSplitOptions) : string array
(+0 other overloads)
String.Split(separator: string array, count: int, options: StringSplitOptions) : string array
(+0 other overloads)
String.Split(separator: string, count: int, ?options: StringSplitOptions) : string array
(+0 other overloads)
String.Split(separator: char array, count: int, options: StringSplitOptions) : string array
(+0 other overloads)
<summary>Specifies options for applicable <see cref="Overload:System.String.Split" /> method overloads, such as whether to omit empty substrings from the returned array or trim whitespace from substrings.</summary>
<summary>Performs operations on <see cref="T:System.String" /> instances that contain file or directory path information. These operations are performed in a cross-platform manner.</summary>
IO.Path.Combine( paths: string array) : string
IO.Path.Combine(path1: string, path2: string) : string
IO.Path.Combine(path1: string, path2: string, path3: string) : string
IO.Path.Combine(path1: string, path2: string, path3: string, path4: string) : string
<summary>Provides static methods for the creation, copying, deletion, moving, and opening of a single file, and aids in the creation of <see cref="T:System.IO.FileStream" /> objects.</summary>
IO.File.ReadAllText(path: string, encoding: Text.Encoding) : string
<summary> Decoration discriminator (e.g. "Investigation", "Study", "Assay") </summary>
type Dataset = inherit DynamicObj new: identifier: string * ?name: string * ?description: string * ?additionalType: string * ?processes: LabProcess seq * ?hasPart: Dataset seq * ?additionalProperty: PropertyValue seq -> Dataset member AddAdditionalProperty: pv: PropertyValue -> unit member AddPart: child: Dataset -> unit member AddProcess: proc: LabProcess -> unit member AllConnectedNodes: node: IONode -> ResizeArray<IONode> member AllData: unit -> ResizeArray<Data> member AllMaterials: unit -> ResizeArray<Material> member AllNodes: unit -> ResizeArray<IONode> member AllProcesses: unit -> ResizeArray<LabProcess> ...
<summary> Container and context for data and processes. schema.org/Dataset </summary>
--------------------
new: identifier: string * ?name: string * ?description: string * ?additionalType: string * ?processes: LabProcess seq * ?hasPart: Dataset seq * ?additionalProperty: PropertyValue seq -> Dataset
type LabProtocol = inherit DynamicObj new: ?name: string * ?description: string * ?version: string * ?url: string * ?intendedUse: DefinedTerm * ?additionalType: string * ?parameters: FormalParameter seq * ?labEquipment: PropertyValue seq * ?additionalProperty: PropertyValue seq -> LabProtocol member AddAdditionalProperty: pv: PropertyValue -> unit member AddLabEquipment: pv: PropertyValue -> unit member AddParameter: fp: FormalParameter -> unit override Equals: obj: obj -> bool override GetHashCode: unit -> int member RemoveAdditionalProperty: pv: PropertyValue -> unit member RemoveLabEquipment: pv: PropertyValue -> unit member RemoveParameter: fp: FormalParameter -> unit ...
<summary> Description of a planned procedure. bioschemas.org/LabProtocol </summary>
--------------------
new: ?name: string * ?description: string * ?version: string * ?url: string * ?intendedUse: DefinedTerm * ?additionalType: string * ?parameters: FormalParameter seq * ?labEquipment: PropertyValue seq * ?additionalProperty: PropertyValue seq -> LabProtocol
type PropertyValue = inherit DynamicObj new: name: string * ?value: string * ?unit: string * ?nameTAN: string * ?valueTAN: string * ?unitTAN: string * ?additionalType: string * ?instanceOf: FormalParameter -> PropertyValue override Equals: obj: obj -> bool override GetHashCode: unit -> int member AdditionalType: string option with get, set member InstanceOf: FormalParameter option with get, set member Name: string with get, set member NameTAN: string option with get, set member NameText: string member Unit: string option with get, set ...
<summary> Extensible key-value-unit triple. Primary extension mechanism of ProcessCore. schema.org/PropertyValue </summary>
--------------------
new: name: string * ?value: string * ?unit: string * ?nameTAN: string * ?valueTAN: string * ?unitTAN: string * ?additionalType: string * ?instanceOf: FormalParameter -> PropertyValue
type Material = inherit DynamicObj new: name: string * ?additionalType: string * ?additionalProperty: PropertyValue seq -> Material member AddAdditionalProperty: pv: PropertyValue -> unit member AllConnectedNodes: ?scope: ResizeArray<LabProcess> -> ResizeArray<IONode> member AllConnectedProcesses: ?scope: ResizeArray<LabProcess> -> ResizeArray<LabProcess> member AllPropertyValues: ?scope: ResizeArray<LabProcess> -> ResizeArray<PropertyValue> member ConnectedData: ?scope: ResizeArray<LabProcess> -> ResizeArray<Data> member ConnectedMaterials: ?scope: ResizeArray<LabProcess> -> ResizeArray<Material> member DownstreamData: ?scope: ResizeArray<LabProcess> -> ResizeArray<Data> member DownstreamMaterials: ?scope: ResizeArray<LabProcess> -> ResizeArray<Material> ...
<summary> Input or output biological, chemical, or digital material in the process graph. bioschemas.org/Sample </summary>
--------------------
new: name: string * ?additionalType: string * ?additionalProperty: PropertyValue seq -> Material
<summary> Decoration discriminator (e.g. "Sample", "Source") </summary>
type LabProcess = inherit DynamicObj new: name: string * ?executesProtocol: LabProtocol * ?additionalType: string * ?inputs: IONode seq * ?outputs: IONode seq * ?parameterValue: PropertyValue seq -> LabProcess member AddInput: node: IONode -> unit member AddInputData: d: Data -> unit member AddInputMaterial: m: Material -> unit member AddOutput: node: IONode -> unit member AddOutputData: d: Data -> unit member AddOutputMaterial: m: Material -> unit member AddParameterValue: pv: PropertyValue -> unit member CanonicalizeAllNodes: ds: Dataset -> unit ...
<summary> Core transformation node. Connects inputs to outputs via a protocol. bioschemas.org/LabProcess </summary>
--------------------
new: name: string * ?executesProtocol: LabProtocol * ?additionalType: string * ?inputs: IONode seq * ?outputs: IONode seq * ?parameterValue: PropertyValue seq -> LabProcess
ProcessCore