Annotations¶
Bases: BaseCuration
Class to store and mutate annotations of samples to various attributes like tissues, dieases, sexes, ages, etc.
| Attributes: |
|
|---|
entities
property
¶
Returns term names of the Annotations frame.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.entities
['UBERON:0000955', 'UBERON:0002349', 'UBERON:0000948', 'UBERON:0002113']
groups
property
¶
Returns the groups column of the Annotations curation.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.groups
['GSE1', 'GSE1', 'GSE2']
ids
property
¶
Return the IDs dataframe.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.ids
┌────────┬────────┐
│ sample ┆ series │
│ --- ┆ --- │
│ str ┆ str │
╞════════╪════════╡
│ GSM1 ┆ GSE1 │
│ GSM2 ┆ GSE1 │
│ GSM3 ┆ GSE2 │
└────────┴────────┘
index
property
¶
Return the index column as a list.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.index
['GSM1', 'GSM2', 'GSM3']
n_indices
property
¶
Returns number of indices.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.n_indices
3
n_entities
property
¶
Returns number of entities.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.n_entities
4
unique_groups
property
¶
Returns unique groups.
Examples:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.unique_groups
['GSE2', 'GSE1']
add_ids(new)
¶
Append new group ID columns to the IDs of an Annotations object. The new IDs must have a matching index.
| Parameters: |
|
|---|
| Returns: |
|
|---|
collapse(on, inplace=True)
¶
Collapses annotations on the specified grouping column.
| Parameters: |
|
|---|
drop(*args, **kwargs)
¶
Wrapper for polars drop. Drops any of the term columns. ID columns are not dropped through this method.
filter(condition)
¶
Filter both data and ids simultaneously using a mask.
| Parameters: |
|
|---|
Examples:
>>> from metahq_core.curations.annotations import Annotations
>>> anno = {
'sample': ['GSM1', 'GSM2', 'GSM3'],
'series': ['GSE1', 'GSE1', 'GSE2'],
'UBERON:0000948': [1, 0, 0],
'UBERON:0002113': [0, 1, 0],
'UBERON:0000955': [0, 0, 1],
}
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.filter(pl.col("UBERON:0000948") == 1)
┌────────┬────────┬────────────────┬────────────────┬────────────────┐
│ sample ┆ series ┆ UBERON:0000948 ┆ UBERON:0002113 ┆ UBERON:0000955 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i32 ┆ i32 ┆ i32 │
╞════════╪════════╪════════════════╪════════════════╪════════════════╡
│ GSM1 ┆ GSE1 ┆ 1 ┆ 0 ┆ 0 │
└────────┴────────┴────────────────┴────────────────┴────────────────┘
head(*args, **kwargs)
¶
Wrapper for polars head function.
save(outfile, fmt, attribute, level, citation_config, metadata=None)
¶
Save the annotations curation.
| Parameters: |
|
|---|
Examples:
If `metadata` is None, will only save the index column
with the remaining annotations.
>>> from metahq_core.curations.annotations import Annotations
>>> from metahq_core.export.references import CitationConfig
>>> config = CitationConfig(
'1.0.1', 'tissue', 'sample', 'human', 'expert', 'rnaseq', 'annotate', '2026-04-20'
)
>>> anno = {
'sample': ['GSM1', 'GSM2', 'GSM3'],
'series': ['GSE1', 'GSE1', 'GSE2'],
'UBERON:0000948': [1, 0, 0],
'UBERON:0002113': [0, 1, 0],
'UBERON:0000955': [0, 0, 1],
}
>>> anno = Annotations.from_df(anno, index_col='sample', group_cols=['series'])
>>> anno.save(
'/path/to/out.parquet', fmt='parquet', attribute='tissue', level='sample'
)
sort_columns()
¶
Sorts term columns.
Examples:
>>> from metahq_core.curations.annotations import Annotations
>>> anno = {
'sample': ['GSM1', 'GSM2', 'GSM3'],
'series': ['GSE1', 'GSE1', 'GSE2'],
'UBERON:0000948': [1, 0, 0],
'UBERON:0002113': [0, 1, 0],
'UBERON:0000955': [0, 0, 1],
}
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.sort_columns()
┌────────┬────────┬────────────────┬────────────────┬────────────────┐
│ series ┆ sample ┆ UBERON:0000948 ┆ UBERON:0000955 ┆ UBERON:0002113 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i32 ┆ i32 ┆ i32 │
╞════════╪════════╪════════════════╪════════════════╪════════════════╡
│ GSE1 ┆ GSM1 ┆ 1 ┆ 0 ┆ 0 │
│ GSE1 ┆ GSM2 ┆ 0 ┆ 0 ┆ 1 │
│ GSE2 ┆ GSM3 ┆ 0 ┆ 1 ┆ 0 │
└────────┴────────┴────────────────┴────────────────┴────────────────┘
propagate(to_terms, ontology, mode, control_col='MONDO:0000000')
¶
Convert annotations to propagated labels.
Assigns propagated labels to terms given their annotations.
| Parameters: |
|
|---|
| Returns: |
|
|---|
Examples:
With `mode=0`:
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
>>> anno.propagate(to_terms=["UBERON:0000948"], ontology="uberon", mode=0)
┌────────┬────────┬────────────────┐
│ sample ┆ series ┆ UBERON:0000948 │
│ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i32 │
╞════════╪════════╪════════════════╡
│ GSM1 ┆ GSE1 ┆ 1 │
│ GSM2 ┆ GSE1 ┆ 1 │
└────────┴────────┴────────────────┘
With `mode=1`:
>>> anno.propagate(to_terms=["UBERON:0000948"], ontology="uberon", mode=1)
┌────────┬────────┬────────────────┐
│ sample ┆ series ┆ UBERON:0000948 │
│ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i32 │
╞════════╪════════╪════════════════╡
│ GSM1 ┆ GSE1 ┆ 1 │
│ GSM2 ┆ GSE1 ┆ 1 │
│ GSM3 ┆ GSE2 ┆ -1 │
└────────┴────────┴────────────────┘
select(*args, **kwargs)
¶
Select annotation columns while maintaining ids.
slice(offset, length=None)
¶
Slice both data and ids simultaneously using polars slice.
| Parameters: |
|
|---|
| Returns: |
|
|---|
from_df(df, index_col, sources_col, group_cols, **kwargs)
classmethod
¶
Creates an Annotations object from a combined DataFrame.
| Attributes: |
|
|---|
| Returns: |
|
|---|
Examples:
>>> from metahq_core.curations.annotations import Annotations
>>> anno = pl.DataFrame(
{
"series": ["GSE1", "GSE1", "GSE2"],
"sample": ["GSM1", "GSM2", "GSM3"],
"UBERON:0000948": [1, 0, 0],
"UBERON:0002349": [1, 1, 0],
"UBERON:0002113": [0, 0, 0],
"UBERON:0000955": [0, 0, 1],
}
)
>>> anno = Annotations.from_df(anno, index_col="sample", group_cols=["series"])
┌────────┬────────┬────────────────┬────────────────┬────────────────┬────────────────┐
│ sample ┆ series ┆ UBERON:0000948 ┆ UBERON:0002349 ┆ UBERON:0002113 ┆ UBERON:0000955 │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
╞════════╪════════╪════════════════╪════════════════╪════════════════╪════════════════╡
│ GSM1 ┆ GSE1 ┆ 1 ┆ 1 ┆ 0 ┆ 0 │
│ GSM2 ┆ GSE1 ┆ 0 ┆ 1 ┆ 0 ┆ 0 │
│ GSM3 ┆ GSE2 ┆ 0 ┆ 0 ┆ 0 ┆ 1 │
└────────┴────────┴────────────────┴────────────────┴────────────────┴────────────────┘