Skip to content

Commit

Permalink
Include entity type in AnVIL bundle FQIDs
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Mar 18, 2023
1 parent 1e60508 commit 150f102
Showing 1 changed file with 40 additions and 14 deletions.
54 changes: 40 additions & 14 deletions src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
defaultdict,
)
import datetime
from enum import (
Enum,
)
import logging
from operator import (
itemgetter,
Expand Down Expand Up @@ -56,6 +59,28 @@

log = logging.getLogger(__name__)


class BundleEntityType(Enum):
biosample: EntityType = 'biosample'


@attr.s(auto_attribs=True, frozen=True, kw_only=True)
class AnvilBundleFQID(TDRBundleFQID):
entity_type: BundleEntityType

def fqid_json(self) -> MutableJSON:
result = super().fqid_json()
result['entity_type'] = self.entity_type.value
return result

@classmethod
def from_json(cls, source: TDRSourceRef, json: JSON) -> 'AnvilBundleFQID':
return cls(source=source,
uuid=json['bundle_uuid'],
version=json['bundle_version'],
entity_type=BundleEntityType(json['entity_type']))


# AnVIL snapshots do not use UUIDs for primary/foreign keys.
# This type alias helps us distinguish these keys from the document UUIDs,
# which are drawn from the `datarepo_row_id` column.
Expand Down Expand Up @@ -196,24 +221,25 @@ def _version(self):
def _list_bundles(self,
source: TDRSourceRef,
prefix: str
) -> list[TDRBundleFQID]:
) -> list[AnvilBundleFQID]:
spec = source.spec
partition_prefix = spec.prefix.common + prefix
validate_uuid_prefix(partition_prefix)
entity_type = TDRAnvilBundle.entity_type
entity_type = BundleEntityType.biosample
rows = self._run_sql(f'''
SELECT datarepo_row_id
FROM {backtick(self._full_table_name(spec, entity_type))}
FROM {backtick(self._full_table_name(spec, entity_type.value))}
WHERE STARTS_WITH(datarepo_row_id, '{partition_prefix}')
''')
return [
TDRBundleFQID(source=source,
# Reversibly tweak the entity UUID to prevent
# collisions between entity IDs and bundle IDs
uuid=uuids.change_version(row['datarepo_row_id'],
self.datarepo_row_uuid_version,
self.bundle_uuid_version),
version=self._version)
AnvilBundleFQID(source=source,
# Reversibly tweak the entity UUID to prevent
# collisions between entity IDs and bundle IDs
uuid=uuids.change_version(row['datarepo_row_id'],
self.datarepo_row_uuid_version,
self.bundle_uuid_version),
version=self._version,
entity_type=entity_type)
for row in rows
]

Expand All @@ -226,7 +252,7 @@ def list_partitions(self,
for partition_prefix in prefix.partition_prefixes()
]
assert prefixes, prefix
entity_type = TDRAnvilBundle.entity_type
entity_type = BundleEntityType.biosample.value
pk_column = entity_type + '_id'
rows = self._run_sql(f'''
SELECT prefix, COUNT({pk_column}) AS subgraph_count
Expand All @@ -236,7 +262,7 @@ def list_partitions(self,
''')
return {row['prefix']: row['subgraph_count'] for row in rows}

def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRAnvilBundle:
def _emulate_bundle(self, bundle_fqid: AnvilBundleFQID) -> TDRAnvilBundle:
source = bundle_fqid.source
bundle_entity = self._bundle_entity(bundle_fqid)

Expand Down Expand Up @@ -277,13 +303,13 @@ def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRAnvilBundle:

return result

def _bundle_entity(self, bundle_fqid: TDRBundleFQID) -> KeyReference:
def _bundle_entity(self, bundle_fqid: AnvilBundleFQID) -> KeyReference:
source = bundle_fqid.source
bundle_uuid = bundle_fqid.uuid
entity_id = uuids.change_version(bundle_uuid,
self.bundle_uuid_version,
self.datarepo_row_uuid_version)
entity_type = TDRAnvilBundle.entity_type
entity_type = bundle_fqid.entity_type.value
pk_column = entity_type + '_id'
bundle_entity = one(self._run_sql(f'''
SELECT {pk_column}
Expand Down

0 comments on commit 150f102

Please sign in to comment.