Skip to content

Commit

Permalink
Add working method
Browse files Browse the repository at this point in the history
  • Loading branch information
KaiWaldrant committed Jul 10, 2024
1 parent 3be418b commit 8fbc4d1
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,30 @@ __merge__: ../../api/comp_method.yaml

# A unique identifier for your component (required).
# Can contain only lowercase letters or underscores.
name: my_method
name: logistic_regression

# Metadata for your component
info:
# A relatively short label, used when rendering visualisations (required)
label: My Method
label: Logistic Regression
# A one sentence summary of how this method works (required). Used when
# rendering summary tables.
summary: "FILL IN: A one sentence summary of this method."
summary: "Logistic Regression with 100-dimensional PCA coordinates estimates parameters for multivariate classification by minimizing cross entropy loss over cell type classes."
# A multi-line description of how this component works (required). Used
# when rendering reference documentation.
description: |
FILL IN: A (multi-line) description of how this method works.
Logistic Regression estimates parameters of a logistic function for
multivariate classification tasks. Here, we use 100-dimensional whitened PCA
coordinates as independent variables, and the model minimises the cross
entropy loss over all cell type classes.
# Which normalisation method this component prefers to use (required).
preferred_normalization: log_cp10k
# A reference key from the bibtex library at src/common/library.bib (required).
reference: bibtex_reference_key
reference: "hosmer2013applied"
# URL to the documentation for this method (required).
documentation_url: https://url.to/the/documentation
documentation_url: "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html"
# URL to the code repository for this method (required).
repository_url: https://github.com/organisation/repository
repository_url: https://github.com/scikit-learn/scikit-learn

# Component-specific parameters (optional)
# arguments:
Expand All @@ -49,17 +52,17 @@ resources:
engines:
# Specifications for the Docker image for this component.
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
image: ghcr.io/openproblems-bio/base_images/python:1.1.0
# Add custom dependencies here (optional). For more information, see
# https://viash.io/reference/config/engines/docker/#setup .
# setup:
# - type: python
# packages: scib==1.1.5
setup:
- type: python
packages: scikit-learn

runners:
# This platform allows running the component natively
- type: executable
# Allows turning the component into a Nextflow module / pipeline.
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
label: [midtime,midmem,lowcpu]
Original file line number Diff line number Diff line change
@@ -1,37 +1,44 @@
import anndata as ad
import sklearn.linear_model

## VIASH START
# Note: this section is auto-generated by viash at runtime. To edit it, make changes
# in config.vsh.yaml and then run `viash config inject config.vsh.yaml`.
par = {
'train_h5ad': 'resources_test/task_template/pancreas/train_h5ad.h5ad',
'input_train': 'resources_test/task_template/pancreas/train.h5ad',
'input_test': 'resources_test/task_template/pancreas/test.h5ad',
'output': 'output.h5ad'
}
meta = {
'name': 'my_method'
'name': 'logistic_regression'
}
## VIASH END

print('Reading input files', flush=True)
train_h5ad = ad.read_h5ad(par['train_h5ad'])
input_train = ad.read_h5ad(par['input_train'])
input_test = ad.read_h5ad(par['input_test'])

print('Preprocess data', flush=True)
# ... preprocessing ...

print('Train model', flush=True)
# ... train model ...
classifier = sklearn.linear_model.LogisticRegression()
classifier.fit(input_train.obsm["X_pca"], input_train.obs["label"].astype(str))

print('Generate predictions', flush=True)
# ... generate predictions ...
obs_label_pred = classifier.predict(input_test.obsm["X_pca"])

print("Write output AnnData to file", flush=True)
output = ad.AnnData(
uns={
'dataset_id': train_h5ad.uns['dataset_id'],
'dataset_id': input_train.uns['dataset_id'],
'normalization_id': input_train.uns['normalization_id'],
'method_id': meta['name']
},
layers={
'prediction': layers_prediction
obs={
'label_pred': obs_label_pred
}
)
output.write_h5ad(par['output'], compression='gzip')

0 comments on commit 8fbc4d1

Please sign in to comment.