Skip to content

Commit

Permalink
Include entity type in AnVIL bundle FQIDs
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Mar 18, 2023
1 parent 3eb1587 commit de9ffdc
Showing 1 changed file with 47 additions and 16 deletions.
63 changes: 47 additions & 16 deletions src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
defaultdict,
)
import datetime
from enum import (
Enum,
)
import logging
from operator import (
itemgetter,
Expand Down Expand Up @@ -111,6 +114,27 @@ def merge(cls, links: Iterable['Link']) -> 'Link':
Links = set[Link]


class BundleEntityType(Enum):
primary: EntityType = 'biosample'


@attr.s(auto_attribs=True, frozen=True, kw_only=True)
class AnvilBundleFQID(TDRBundleFQID):
entity_type: BundleEntityType

def fqid_json(self) -> MutableJSON:
result = super().fqid_json()
result['entity_type'] = self.entity_type.value
return result

@classmethod
def from_json(cls, source: TDRSourceRef, json: JSON) -> 'AnvilBundleFQID':
return cls(source=source,
uuid=json['bundle_uuid'],
version=json['bundle_version'],
entity_type=BundleEntityType(json['entity_type']))


class TDRAnvilBundle(TDRBundle):
entity_type: EntityType = 'biosample'

Expand Down Expand Up @@ -180,7 +204,7 @@ def _parse_drs_uri(self, file_ref: Optional[str]) -> Optional[str]:
return self._parse_drs_path(file_ref)


class Plugin(TDRPlugin[TDRSourceSpec, TDRSourceRef, TDRBundleFQID]):
class Plugin(TDRPlugin[TDRSourceSpec, TDRSourceRef, AnvilBundleFQID]):

@cached_property
def _version(self):
Expand All @@ -196,24 +220,25 @@ def _version(self):
def _list_bundles(self,
source: TDRSourceRef,
prefix: str
) -> list[TDRBundleFQID]:
) -> list[AnvilBundleFQID]:
spec = source.spec
partition_prefix = spec.prefix.common + prefix
validate_uuid_prefix(partition_prefix)
entity_type = TDRAnvilBundle.entity_type
primary = BundleEntityType.primary.value
rows = self._run_sql(f'''
SELECT datarepo_row_id
FROM {backtick(self._full_table_name(spec, entity_type))}
SELECT datarepo_row_id, {primary!r} AS entity_type
FROM {backtick(self._full_table_name(spec, primary))}
WHERE STARTS_WITH(datarepo_row_id, '{partition_prefix}')
''')
return [
TDRBundleFQID(source=source,
# Reversibly tweak the entity UUID to prevent
# collisions between entity IDs and bundle IDs
uuid=uuids.change_version(row['datarepo_row_id'],
self.datarepo_row_uuid_version,
self.bundle_uuid_version),
version=self._version)
AnvilBundleFQID(source=source,
# Reversibly tweak the entity UUID to prevent
# collisions between entity IDs and bundle IDs
uuid=uuids.change_version(row['datarepo_row_id'],
self.datarepo_row_uuid_version,
self.bundle_uuid_version),
version=self._version,
entity_type=BundleEntityType(row['entity_type']))
for row in rows
]

Expand All @@ -226,7 +251,7 @@ def list_partitions(self,
for partition_prefix in prefix.partition_prefixes()
]
assert prefixes, prefix
entity_type = TDRAnvilBundle.entity_type
entity_type = BundleEntityType.primary.value
pk_column = entity_type + '_id'
rows = self._run_sql(f'''
SELECT prefix, COUNT({pk_column}) AS subgraph_count
Expand All @@ -236,7 +261,13 @@ def list_partitions(self,
''')
return {row['prefix']: row['subgraph_count'] for row in rows}

def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRAnvilBundle:
def _emulate_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle:
if bundle_fqid.entity_type is BundleEntityType.primary:
return self._primary_bundle(bundle_fqid)
else:
assert False, bundle_fqid.entity_type

def _primary_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle:
source = bundle_fqid.source
bundle_entity = self._bundle_entity(bundle_fqid)

Expand Down Expand Up @@ -277,13 +308,13 @@ def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRAnvilBundle:

return result

def _bundle_entity(self, bundle_fqid: TDRBundleFQID) -> KeyReference:
def _bundle_entity(self, bundle_fqid: AnvilBundleFQID) -> KeyReference:
source = bundle_fqid.source
bundle_uuid = bundle_fqid.uuid
entity_id = uuids.change_version(bundle_uuid,
self.bundle_uuid_version,
self.datarepo_row_uuid_version)
entity_type = TDRAnvilBundle.entity_type
entity_type = bundle_fqid.entity_type.value
pk_column = entity_type + '_id'
bundle_entity = one(self._run_sql(f'''
SELECT {pk_column}
Expand Down

0 comments on commit de9ffdc

Please sign in to comment.