Querying Process Graphs
ProcessCore query methods let you ask questions from either the dataset or a specific material/data node. This walkthrough loads the proteomics assay example and follows provenance from final data back to experimental conditions.
Show source YAML
type: Dataset
additionalType: Assay
identifier: measurement1
creators:
-
type: Person
givenName: Oliver
affiliation:
type: Organization
name: RPTU University of Kaiserslautern
email: mailto:maus@nfdi4plants.org
familyName: Maus
jobTitles:
"@id": http://purl.org/spar/scoro/research-assistant
"@type": DefinedTerm
name: research assistant
termCode: http://purl.org/spar/scoro/research-assistant
labProtocols:
-
"@id": "#Protocol_Growth"
type: LabProtocol
labEquipments:
"@id": "#Component_growth_environment_bioreactor"
-
"@id": "#Protocol_Cell_Lysis"
type: LabProtocol
labEquipments:
"@id": "#Component_centrifuge_Eppendorf™_Centrifuge_5420"
-
"@id": "#Protocol_MS_Run"
type: LabProtocol
labEquipments:
"@id": "#Component_mass_spectrometer_Q_Exactive_9000"
-
"@id": "#Protocol_Computational_Proteome_Analysis"
type: LabProtocol
propertyValues:
-
"@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
type: PropertyValue
additionalType: ParameterValue # inherits from PropertyValue
name: sonicator
nameTAN: https://bioregistry.io/OBI:0400114
value: Fisherbrand™ Model 705 Sonic Dismembrator
valueTAN: https://bioregistry.io/OBI:5453453
-
"@id": "#ParameterValue_time_10_minute"
type: PropertyValue
additionalType: ParameterValue
name: time
nameTAN: https://bioregistry.io/PATO:0000165
value: 10
unit: minute
unitTAN: https://bioregistry.io/UO:0000031
-
"@id": "#ParameterValue_technical_replicate_group_1"
type: PropertyValue
additionalType: ParameterValue
name: technical replicate group
nameTAN: https://bioregistry.io/DPBO:1000184
value: 1
-
"@id": "#ParameterValue_technical_replicate_group_2"
type: PropertyValue
additionalType: ParameterValue
name: technical replicate group
nameTAN: https://bioregistry.io/DPBO:1000184
value: 2
-
"@id": "#ParameterValue_technical_replicate_group_3"
type: PropertyValue
additionalType: ParameterValue
name: technical replicate group
nameTAN: https://bioregistry.io/DPBO:1000184
value: 3
-
"@id": "#ParameterValue_software_ProteomIqon"
type: PropertyValue
additionalType: ParameterValue
name: software
nameTAN: https://bioregistry.io/IAO_0000010
value: ProteomIQon
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
type: PropertyValue
additionalType: CharacteristicValue
name: organism
nameTAN: https://bioregistry.io/SIO:010000
value: Arabidopsis thaliana
valueTAN: https://bioregistry.io/NCBITaxon:3702
-
"@id": "#FactorValue_temperature_25_degree_Celsius"
type: PropertyValue
additionalType: FactorValue
name: temperature
nameTAN: https://bioregistry.io/NCRO:0000029
value: 25
unit: degree Celsius
unitTAN: https://bioregistry.io/UO:0000027
-
"@id": "#FactorValue_temperature_30_degree_Celsius"
type: PropertyValue
additionalType: FactorValue
name: temperature
nameTAN: https://bioregistry.io/NCRO:0000029
value: 30
unit: degree Celsius
unitTAN: https://bioregistry.io/UO:0000027
-
"@id": "#Component_growth_environment_bioreactor"
type: PropertyValue
additionalType: Component
name: growth environment
nameTAN: https://bioregistry.io/OBI:0000997
value: bioreactor
valueTAN: https://bioregistry.io/OBI:0001046
-
"@id": "#Component_mass_spectrometer_Q_Exactive_9000"
type: PropertyValue
additionalType: Component
name: mass spectrometer
nameTAN: https://bioregistry.io/OBI:0000049
value: Q Exactive 9000
processes:
-
# Possible worksheet grouping
type: LabProcess
name: Growth
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Base Culture
additionalProperty:
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
outputs:
- type: Material
additionalType: Sample
name: Cultivation Flask RT
additionalProperty:
-
"@id": "#FactorValue_temperature_25_degree_Celsius"
executesProtocol:
"@id": "#Protocol_Growth"
-
type: LabProcess
name: Growth
inputs:
- type: Material
additionalType: Source
name: Base Culture
additionalProperty:
-
"@id": "#CharacteristicValue_organism_Arabidopsis_thaliana"
outputs:
- type: Material
additionalType: Sample
name: Cultivation Flask HT
additionalProperty:
-
"@id": "#FactorValue_temperature_30_degree_Celsius"
executesProtocol:
"@id": "#Protocol_Growth"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material
additionalType: Source # = additionalType: [Source]
name: Cultivation Flask RT
outputs:
- type: Material
additionalType: Sample
name: Eppi RT 1
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_1"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask RT
outputs:
- type: Material
additionalType: Sample
name: Eppi RT 2
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_2"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask RT
outputs:
- type: Material
additionalType: Sample
name: Eppi RT 3
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_3"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask HT
outputs:
- type: Material
additionalType: Sample
name: Eppi HT 1
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_1"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask HT
outputs:
- type: Material
additionalType: Sample
name: Eppi HT 2
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_2"
-
type: LabProcess
name: Cell Lysis
inputs:
- type: Material # = additionalType: [Source]
additionalType: Source
name: Cultivation Flask HT
outputs:
- type: Material
additionalType: Sample
name: Eppi HT 3
executesProtocol:
"@id": "#Protocol_Cell_Lysis"
parameterValue:
- "@id": "#ParameterValue_time_10_minute"
- "@id": "#ParameterValue_sonicator_Fisherbrand_Model_705_Sonic_Dismembrator"
- "@id": "#ParameterValue_technical_replicate_group_3"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi RT 1
outputs:
- type: Data
path: sample1.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi RT 2
outputs:
- type: Data
path: sample2.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi RT 3
outputs:
- type: Data
path: sample3.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi HT 1
outputs:
- type: Data
path: sample4.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi HT 2
outputs:
- type: Data
path: sample5.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: MS Run
inputs:
- type: Material
additionalType: Sample
name: Eppi HT 3
outputs:
- type: Data
path: sample6.raw
executesProtocol:
"@id": "#Protocol_MS_Run"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample1.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=12"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample2.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=13"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample3.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=14"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample4.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=15"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample5.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=16"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
-
type: LabProcess
name: Computational Proteome Analysis
inputs:
- type: Data
path: sample6.raw
outputs:
- type: Data
path: "proteomics_result.csv#col=17"
encodingFormat: text/csv
usageInfo: https://datatracker.ietf.org/doc/html/rfc7111
executesProtocol:
"@id": "#Protocol_Computational_Proteome_Analysis"
parameterValue:
"@id": "#ParameterValue_software_ProteomIqon"
additionalProperty:
- # = generalProperty: (measurement type)
type: PropertyValue
name: variableMeasured
nameTAN: https://schema.org/variableMeasured
value: proteomics
valueTAN: https://bioregistry.io/MS:1003348
let myAssay = ProcessCore.Yaml.Dataset.fromYamlString false ymlString
The example stores protocol references by id. For the protocol-name filter below, mirror the process name into the protocol name when the YAML did not provide one.
for proc in myAssay.Processes do
proc.ExecutesProtocol
|> Option.iter (fun protocol ->
if protocol.Name.IsNone then
protocol.Name <- Some proc.Name)
Dataset-Level Discovery
Start by asking what is in the dataset. Dataset helpers include nested datasets through AllProcesses, AllMaterials, AllData, and AllNodes.
let datasetOverview =
[ "processes", myAssay.AllProcesses().Count
"materials", myAssay.AllMaterials().Count
"data", myAssay.AllData().Count
"root nodes", myAssay.RootNodes().Count
"final nodes", myAssay.FinalNodes().Count ]
datasetOverview
|
let rootNodes =
myAssay.RootNodes()
|> Seq.map (fun n -> n.Key())
|> Seq.toList
rootNodes
|
let finalNodes =
myAssay.FinalNodes()
|> Seq.map (fun n -> n.Key())
|> Seq.toList
finalNodes
|
Node-Centered Traversal
Pick one final result file and inspect the graph around it.
let resultData =
myAssay.AllData()
|> Seq.find (fun d -> d.Path.Contains("proteomics_result.csv"))
let resultContext =
[ "path", resultData.Path
"upstream nodes", string (myAssay.NodesUpstreamOf(DataNode resultData).Count)
"downstream nodes", string (myAssay.NodesDownstreamOf(DataNode resultData).Count)
"paths through result", string (myAssay.PathsThrough(DataNode resultData).Count) ]
resultContext
|
let upstreamNodeKeys =
myAssay.NodesUpstreamOf(DataNode resultData)
|> Seq.map (fun n -> n.Key())
|> Seq.toList
upstreamNodeKeys
|
Property-value queries collect annotations from process parameters, input/output node properties, and protocol components.
let upstreamPropertyValues =
myAssay.UpstreamPropertyValuesForNode(DataNode resultData)
|> Seq.map (fun pv -> pv.Name + "=" + pv.ValueWithUnitText)
|> Seq.distinct
|> Seq.toList
upstreamPropertyValues
|
Composable Queries
Plain F# sequence operations compose with graph traversal. This predicate selects the growth temperature condition used in the example.
let is25Degrees (pv: PropertyValue) =
pv.NameText = "temperature"
&& pv.ValueText = "25"
&& pv.UnitText = "degree Celsius"
let dataWith25DegreeHistory =
myAssay.AllData()
|> Seq.filter (fun data ->
myAssay.UpstreamPropertyValuesForNode(DataNode data)
|> Seq.exists is25Degrees)
|> Seq.map (fun d -> d.Path)
|> Seq.toList
dataWith25DegreeHistory
|
Protocol-name filters narrow property collection to processes whose executed protocol has the given name.
let resultPathsFrom25DegreeGrowth =
myAssay.AllData()
|> Seq.filter (fun data -> data.Path.Contains("proteomics_result.csv"))
|> Seq.filter (fun data ->
data.UpstreamPropertyValues(protocolName = "Growth", scope = myAssay.AllProcesses())
|> Seq.exists is25Degrees)
|> Seq.map (fun d -> d.Path)
|> Seq.toList
resultPathsFrom25DegreeGrowth
|
What To Use When
Task |
API |
|---|---|
Count or list dataset contents |
|
Find terminal sources and sinks |
|
Walk from a node |
|
Collect annotations around a node |
|
Work inside one dataset only |
Dataset-scoped helpers such as |
Ask path questions |
|
<summary>Performs operations on <see cref="T:System.String" /> instances that contain file or directory path information. These operations are performed in a cross-platform manner.</summary>
System.IO.Path.Combine( paths: string array) : string
System.IO.Path.Combine(path1: string, path2: string) : string
System.IO.Path.Combine(path1: string, path2: string, path3: string) : string
System.IO.Path.Combine(path1: string, path2: string, path3: string, path4: string) : string
<summary>Provides static methods for the creation, copying, deletion, moving, and opening of a single file, and aids in the creation of <see cref="T:System.IO.FileStream" /> objects.</summary>
System.IO.File.ReadAllText(path: string, encoding: System.Text.Encoding) : string
<summary>Provides methods for encoding and decoding URLs when processing Web requests.</summary>
System.Net.WebUtility.HtmlEncode(value: string, output: System.IO.TextWriter) : unit
System.String.Contains(value: char) : bool
System.String.Contains(value: string, comparisonType: System.StringComparison) : bool
System.String.Contains(value: char, comparisonType: System.StringComparison) : bool
val string: value: 'T -> string
--------------------
type string = System.String
type PropertyValue = inherit DynamicObj new: name: string * ?value: string * ?unit: string * ?nameTAN: string * ?valueTAN: string * ?unitTAN: string * ?additionalType: string * ?instanceOf: FormalParameter -> PropertyValue override Equals: obj: obj -> bool override GetHashCode: unit -> int member AdditionalType: string option with get, set member InstanceOf: FormalParameter option with get, set member Name: string with get, set member NameTAN: string option with get, set member NameText: string member Unit: string option with get, set ...
<summary> Extensible key-value-unit triple. Primary extension mechanism of ProcessCore. schema.org/PropertyValue </summary>
--------------------
new: name: string * ?value: string * ?unit: string * ?nameTAN: string * ?valueTAN: string * ?unitTAN: string * ?additionalType: string * ?instanceOf: FormalParameter -> PropertyValue
ProcessCore