Skip to content

Commit

Permalink
FHIR: Output as NPM package
Browse files Browse the repository at this point in the history
- Rename: OboGraphToFHIRConverter --> OboGraphToFhirJsonConverter
- Add: OboGraphToFhirNpmConverter: Saves in FHIR NPM package format.
- Add: New CLI output_type option: fhirnpm
- Add: StreamingFhirNpmWriter (WIP)
- Add: Test file: tests/input/fhir_npm_manifest_so.json
- Add: Test helper function: _load_and_convert_npm()
- Add: Unit test: test_convert_so_package()
- Update: .gitignore: tests/input/*_conf.json
  • Loading branch information
joeflack4 committed Sep 5, 2023
1 parent f7d3738 commit e99da6e
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 30 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
.tox/
__pycache__/
.ipynb_checkpoints/
tests/output/
dist/
db/

Expand All @@ -25,7 +24,8 @@ notebooks/api-key.txt
.coverage.*
.coverage
coverage.*
tests/input/fhirjson_conf.json
tests/input/*_conf.json
tests/output/

oak_hp.profile
oak_semsimian_hp.profile
Expand Down
2 changes: 1 addition & 1 deletion docs/packages/converters/obo-graph-to-fhir.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ OBO Graph to FHIR Converter

.. currentmodule:: oaklib.converters.obo_graph_to_fhir_converter

.. autoclass:: OboGraphToFHIRConverter
.. autoclass:: OboGraphToFhirJsonConverter
:members:
10 changes: 8 additions & 2 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@
from oaklib.io.rollup_report_writer import write_report
from oaklib.io.streaming_axiom_writer import StreamingAxiomWriter
from oaklib.io.streaming_csv_writer import StreamingCsvWriter
from oaklib.io.streaming_fhir_writer import StreamingFHIRWriter
from oaklib.io.streaming_fhir_writer import (
StreamingFhirJsonWriter,
StreamingFhirNpmWriter,
)
from oaklib.io.streaming_info_writer import StreamingInfoWriter
from oaklib.io.streaming_json_writer import StreamingJsonWriter
from oaklib.io.streaming_kgcl_writer import StreamingKGCLWriter
Expand Down Expand Up @@ -208,6 +211,7 @@
NL_FORMAT = "nl"
KGCL_FORMAT = "kgcl"
FHIR_JSON_FORMAT = "fhirjson"
FHIR_NPM_FORMAT = "fhirnpm"
HEATMAP_FORMAT = "heatmap"

ONT_FORMATS = [
Expand All @@ -218,6 +222,7 @@
JSON_FORMAT,
YAML_FORMAT,
FHIR_JSON_FORMAT,
FHIR_NPM_FORMAT,
CSV_FORMAT,
NL_FORMAT,
]
Expand All @@ -234,7 +239,8 @@
JSONL_FORMAT: StreamingJsonWriter,
YAML_FORMAT: StreamingYamlWriter,
SSSOM_FORMAT: StreamingSssomWriter,
FHIR_JSON_FORMAT: StreamingFHIRWriter,
FHIR_JSON_FORMAT: StreamingFhirJsonWriter,
FHIR_NPM_FORMAT: StreamingFhirNpmWriter,
INFO_FORMAT: StreamingInfoWriter,
NL_FORMAT: StreamingNaturalLanguageWriter,
KGCL_FORMAT: StreamingKGCLWriter,
Expand Down
87 changes: 83 additions & 4 deletions src/oaklib/converters/obo_graph_to_fhir_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,21 @@
- Updates issue: https://github.com/INCATools/ontology-access-kit/issues/369
- Conversion examples: https://drive.google.com/drive/folders/1lwGQ63_fedfWlGlRemq8OeZhZsvIXN01
"""
import json
import logging
import os
import shutil
import subprocess
import sys
import tarfile
import tempfile
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple, Union

import rdflib
from linkml_runtime.dumpers import json_dumper
from sssom.parsers import parse_sssom_table
from sssom.writers import write_fhir_json

from oaklib.converters.data_model_converter import DataModelConverter
from oaklib.datamodels.fhir import (
Expand Down Expand Up @@ -52,7 +61,7 @@


@dataclass
class OboGraphToFHIRConverter(DataModelConverter):
class OboGraphToFhirJsonConverter(DataModelConverter):
"""Converts from OboGraph to FHIR.
- An ontology is mapped to a FHIR `CodeSystem <https://build.fhir.org/codesystem.html>`_.
Expand Down Expand Up @@ -86,7 +95,7 @@ def dump(
Dump an OBO Graph Document to a FHIR CodeSystem.
:param source: Source serialization.
:param target: Target serialization.
:param target: Target outpath.
:param kwargs: Additional keyword arguments passed to :ref:`convert`.
"""
cs = self.convert(
Expand Down Expand Up @@ -119,11 +128,11 @@ def convert(
To use:
>>> from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter
>>> from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFhirJsonConverter
>>> from oaklib.datamodels.obograph import GraphDocument
>>> from linkml_runtime.dumpers import json_dumper
>>> from linkml_runtime.loaders import json_loader
>>> converter = OboGraphToFHIRConverter()
>>> converter = OboGraphToFhirJsonConverter()
>>> graph = json_loader.load("tests/input/hp_test.json", target_class=GraphDocument)
>>> code_system = converter.convert(graph)
>>> print(json_dumper.dumps(code_system))
Expand Down Expand Up @@ -209,6 +218,7 @@ def _convert_graph(
predicate_period_replacement: bool = False,
) -> CodeSystem:
target.id = source.id
target.version = source.meta.version
edges_by_subject = index_graph_edges_by_subject(source)
logging.info(f"Converting graph to obo: {source.id}, nodes={len(source.nodes)}")
self.predicates_to_export = set()
Expand Down Expand Up @@ -290,3 +300,72 @@ def _convert_meta(self, source: Node, concept: Concept):
value=synonym.val,
)
)


@dataclass
class OboGraphToFhirNpmConverter(OboGraphToFhirJsonConverter):
"""Converts an OBO Graph to a FHIR NPM package.
Plays the same role as OboGraphToFhirJsonConverter, but also packages the outpus.
"""

def dump(
self,
source: GraphDocument,
target: str,
manifest_path: str,
**kwargs,
) -> str:
"""
Dump an OBO Graph Document to a FHIR CodeSystem.
:param source: Source serialization.
:param target: Target directory to save the output.
:param manifest_path: Path to a manifest JSON. Required fields:'name', 'version', 'description', and 'author'.
See: https://confluence.hl7.org/display/FHIR/NPM+Package+Specification
:param obograph_path: Path to an OBO Graph JSON file.
:param kwargs: Additional keyword arguments passed to :ref:`convert`.
"""
cs = self.convert(
source,
**kwargs,
)
cs_filename = "CodeSystem-" + kwargs["code_system_id"] + ".json"

outpath = os.path.join(target, cs_filename.replace(".json", ".tgz"))

# Create directory structure
temp_dir = tempfile.mkdtemp()
package_dir = os.path.join(temp_dir, "package")
os.mkdir(package_dir)

# Save FHIR resources
cs_str = json_dumper.dumps(cs, inject_type=False)
with open(os.path.join(package_dir, cs_filename), "w", encoding="UTF-8") as f:
f.write(cs_str)

# Save manifest package.json
shutil.copyfile(manifest_path, os.path.join(package_dir, "package.json"))

# Create and save .index.json
package_index = {
"index-version": 1,
"files": [
{
"filename": cs_filename,
"resourceType": "CodeSystem",
"id": kwargs["code_system_id"],
"url": kwargs["code_system_url"],
"version": cs.version,
},
],
}
with open(os.path.join(package_dir, ".index.json"), "w", encoding="UTF-8") as f:
json.dump(package_index, f)

# Save zipfile and remove temp dir
with tarfile.open(outpath, "w:gz") as tar:
tar.add(package_dir, arcname="package")
shutil.rmtree(temp_dir)

return outpath
8 changes: 6 additions & 2 deletions src/oaklib/interfaces/dumper_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from linkml_runtime.dumpers import json_dumper

from oaklib.converters.obo_graph_to_cx_converter import OboGraphToCXConverter
from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter
from oaklib.converters.obo_graph_to_fhir_converter import (
OboGraphToFhirJsonConverter,
OboGraphToFhirNpmConverter,
)
from oaklib.converters.obo_graph_to_obo_format_converter import (
OboGraphToOboFormatConverter,
)
Expand All @@ -18,7 +21,8 @@

OBOGRAPH_CONVERTERS = {
"obo": OboGraphToOboFormatConverter,
"fhirjson": OboGraphToFHIRConverter,
"fhirjson": OboGraphToFhirJsonConverter,
"fhirnpm": OboGraphToFhirNpmConverter,
"owl": OboGraphToRdfOwlConverter,
"turtle": OboGraphToRdfOwlConverter,
"rdf": OboGraphToRdfOwlConverter,
Expand Down
27 changes: 24 additions & 3 deletions src/oaklib/io/streaming_fhir_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

from linkml_runtime.dumpers import json_dumper

from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFHIRConverter
from oaklib.converters.obo_graph_to_fhir_converter import OboGraphToFhirJsonConverter
from oaklib.datamodels.obograph import GraphDocument
from oaklib.interfaces.obograph_interface import OboGraphInterface
from oaklib.io.streaming_writer import StreamingWriter
from oaklib.types import CURIE


@dataclass
class StreamingFHIRWriter(StreamingWriter):
class StreamingFhirJsonWriter(StreamingWriter):
"""
A writer that emits FHIR CodeSystem objects or Concept objects
"""
Expand All @@ -24,10 +24,31 @@ def emit_multiple(self, entities: Iterable[CURIE], **kwargs):
g = oi.extract_graph(list(entities), include_metadata=True)
gd = GraphDocument(graphs=[g])
logging.info(f"Converting {len(g.nodes)} nodes to OBO")
converter = OboGraphToFHIRConverter()
converter = OboGraphToFhirJsonConverter()
converter.curie_converter = oi.converter
code_system = converter.convert(gd)
logging.info(f"Writing {len(code_system.concept)} Concepts")
# TODO: Should not this call OboGraphToFhirJsonConverter.dump()?
self.file.write(json_dumper.dumps(code_system))
else:
super().emit_multiple(entities, **kwargs)


# TODO:
@dataclass
class StreamingFhirNpmWriter(StreamingWriter):
"""
A writer that emits FHIR CodeSystem objects or Concept objects
"""

def emit_multiple(self, entities: Iterable[CURIE], **kwargs):
oi = self.ontology_interface
if isinstance(oi, OboGraphInterface):
logging.info("Extracting graph")
g = oi.extract_graph(list(entities), include_metadata=True)
gd = GraphDocument(graphs=[g])
logging.info(f"Converting {len(g.nodes)} nodes to OBO")
converter = None
print(gd, converter)
else:
super().emit_multiple(entities, **kwargs)
24 changes: 24 additions & 0 deletions tests/input/fhir_npm_manifest_so.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"name": "sequence-ontology",
"version": "0.1.0",
"canonical": "http://purl.obolibrary.org/obo/so.owl",
"title": "Sequence Ontology",
"description": "The Sequence Ontology is a set of terms and relationships used to describe the features and attributes of biological sequence.",
"homepage": "http://www.sequenceontology.org/",
"keywords": [
"SO",
"Sequence Ontology"
],
"author": "TIMS",
"maintainers": [
{
"name": "Joe Flack",
"email": "[email protected]"
},
{
"name": "Shahim Essaid",
"email": "[email protected]"
}
],
"license": "MIT"
}
Loading

0 comments on commit e99da6e

Please sign in to comment.