Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

warn annotation loss with resolution change #977

Merged
merged 7 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,19 @@ exports[`runQC action Dispatches events properly 2`] = `

exports[`runQC action Dispatches status error if loading fails 1`] = `[]`;

exports[`runQC action Runs only the embedding if only changed filter was configureEmbedding 1`] = `
exports[`runQC action Runs only clustering if only changed filter was clusteringSettings 1`] = `
[
{
"payload": {},
"type": "experimentSettings/discardChangedQCFilters",
},
{
"type": "cellSets/clusteringUpdating",
},
]
`;

exports[`runQC action Runs only the embedding if only changed filter was embeddingSettings 1`] = `
[
{
"payload": {},
Expand Down
35 changes: 31 additions & 4 deletions src/__test__/redux/actions/pipelines/runQC.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import {
EXPERIMENT_SETTINGS_DISCARD_CHANGED_QC_FILTERS,
} from 'redux/actionTypes/experimentSettings';

import { CELL_SETS_CLUSTERING_UPDATING } from 'redux/actionTypes/cellSets';

import { EMBEDDINGS_LOADING } from 'redux/actionTypes/embeddings';

import { runQC } from 'redux/actions/pipeline';
Expand Down Expand Up @@ -46,6 +48,7 @@ const initialState = {
},
},
},
cellSets: {},
backendStatus: {
[experimentId]: {
status: {
Expand Down Expand Up @@ -102,15 +105,15 @@ describe('runQC action', () => {
expect(actions).toMatchSnapshot();
});

it('Runs only the embedding if only changed filter was configureEmbedding', async () => {
it('Runs only the embedding if only changed filter was embeddingSettings', async () => {
fetchMock.resetMocks();

saveProcessingSettings.mockImplementation(() => () => Promise.resolve());

const onlyConfigureEmbeddingChangedState = _.cloneDeep(initialState);
onlyConfigureEmbeddingChangedState.experimentSettings.processing.meta.changedQCFilters = new Set(['configureEmbedding']);
const onlyEmbeddingSettingsChangedState = _.cloneDeep(initialState);
onlyEmbeddingSettingsChangedState.experimentSettings.processing.meta.changedQCFilters = new Set(['embeddingSettings']);

const store = mockStore(onlyConfigureEmbeddingChangedState);
const store = mockStore(onlyEmbeddingSettingsChangedState);
await store.dispatch(runQC(experimentId));

await waitForActions(
Expand All @@ -125,4 +128,28 @@ describe('runQC action', () => {

expect(actions).toMatchSnapshot();
});

it('Runs only clustering if only changed filter was clusteringSettings', async () => {
fetchMock.resetMocks();

saveProcessingSettings.mockImplementation(() => () => Promise.resolve());

const onlyClusteringSettingsChangedState = _.cloneDeep(initialState);
onlyClusteringSettingsChangedState.experimentSettings.processing.meta.changedQCFilters = new Set(['clusteringSettings']);

const store = mockStore(onlyClusteringSettingsChangedState);
await store.dispatch(runQC(experimentId));

await waitForActions(
store,
[EXPERIMENT_SETTINGS_DISCARD_CHANGED_QC_FILTERS, CELL_SETS_CLUSTERING_UPDATING],
);

const actions = store.getActions();

expect(actions[0].type).toEqual(EXPERIMENT_SETTINGS_DISCARD_CHANGED_QC_FILTERS);
expect(actions[1].type).toEqual(CELL_SETS_CLUSTERING_UPDATING);

expect(actions).toMatchSnapshot();
});
});
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import React, {
useState, useEffect, useCallback,
useState, useEffect,
} from 'react';
import _ from 'lodash';
import { useDispatch, useSelector } from 'react-redux';
Expand All @@ -9,9 +9,8 @@ import {
import PropTypes from 'prop-types';
import { QuestionCircleOutlined } from '@ant-design/icons';

import { updateFilterSettings, saveProcessingSettings } from 'redux/actions/experimentSettings';
import { updateFilterSettings } from 'redux/actions/experimentSettings';

import { runCellSetsClustering } from 'redux/actions/cellSets';
import PreloadContent from '../../PreloadContent';

import SliderWithInput from '../../SliderWithInput';
Expand All @@ -32,7 +31,7 @@ const EMBEDD_METHOD_TEXT = 'Reducing the dimensionality does lose some informati
+ 't-SNE and UMAP are stochastic and very much dependent on choice of parameters (t-SNE even more than UMAP) and can yield very different results in different runs. ';

const CalculationConfig = (props) => {
const { experimentId, onConfigChange, disabled } = props;
const { onConfigChange, disabled } = props;
const FILTER_UUID = 'configureEmbedding';
const dispatch = useDispatch();

Expand All @@ -46,41 +45,19 @@ const CalculationConfig = (props) => {
const { umap: umapSettings, tsne: tsneSettings } = data?.embeddingSettings.methodSettings || {};
const { louvain: louvainSettings } = data?.clusteringSettings.methodSettings || {};

const debouncedClustering = useCallback(
_.debounce((resolution) => {
dispatch(runCellSetsClustering(experimentId, resolution));
}, 1500),
[],
);

const [resolution, setResolution] = useState(null);
const [minDistance, setMinDistance] = useState(null);

useEffect(() => {
if (!resolution && louvainSettings) {
setResolution(louvainSettings.resolution);
}
}, [louvainSettings]);

useEffect(() => {
if (!minDistance && umapSettings) {
setMinDistance(umapSettings.minimumDistance);
}
}, [umapSettings]);

const dispatchDebounce = useCallback(_.debounce((f) => {
dispatch(f);
}, 1500), []);

const updateSettings = (diff) => {
if (diff.embeddingSettings) {
// If this is an embedding change, indicate to user that their changes are not
// applied until they hit Run.
onConfigChange();
} else {
// If it's a clustering change, debounce the save process at 1.5s.
dispatchDebounce(saveProcessingSettings(experimentId, FILTER_UUID));
}
// updates to configure embedding run on worker if they are the only changes
// need to know if change was to embedding or clustering settings
const settingType = Object.keys(diff)[0];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only the first changed from the diff and not the whole Object.keys(diff)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each of the diffs is a single object where the top level key is either going to be embeddingSettings or clusteringSettings. This just extracts that key.

onConfigChange(settingType);

dispatch(updateFilterSettings(
FILTER_UUID,
Expand All @@ -98,8 +75,6 @@ const CalculationConfig = (props) => {
},
},
});

onConfigChange();
};
const setDistanceMetric = (value) => {
updateSettings({
Expand All @@ -111,8 +86,6 @@ const CalculationConfig = (props) => {
},
},
});

onConfigChange();
};

const setLearningRate = (value) => {
Expand All @@ -125,8 +98,6 @@ const CalculationConfig = (props) => {
},
},
});

onConfigChange();
};
const setPerplexity = (value) => {
updateSettings({
Expand All @@ -138,8 +109,6 @@ const CalculationConfig = (props) => {
},
},
});

onConfigChange();
};

const renderUMAPSettings = () => (
Expand Down Expand Up @@ -394,21 +363,14 @@ const CalculationConfig = (props) => {
max={10}
step={0.1}
disabled={disabled}
value={resolution}
onUpdate={(value) => {
if (value === resolution) { return; }

setResolution(value);
updateSettings({
clusteringSettings: {
methodSettings: {
louvain: { resolution: value },
},
value={louvainSettings.resolution}
onUpdate={(value) => updateSettings({
clusteringSettings: {
methodSettings: {
louvain: { resolution: value },
},
});

debouncedClustering(value);
}}
},
})}
/>
</Form.Item>
</Form>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ const DataProcessingPage = ({ experimentId, experimentData }) => {
<ConfigureEmbedding
experimentId={expId}
key={key}
onConfigChange={() => onConfigChange(key)}
onConfigChange={(settingType) => onConfigChange(settingType)}
stepHadErrors={getStepHadErrors(key)}
/>
),
Expand Down Expand Up @@ -695,6 +695,10 @@ const DataProcessingPage = ({ experimentId, experimentData }) => {
Your navigation within Cellenics will be restricted during this time.
Do you want to start?
</p>
<Alert
message='Note that you will lose all of your annotated cell sets.'
type='warning'
/>
</Modal>
)
)}
Expand Down
46 changes: 37 additions & 9 deletions src/redux/actions/pipeline/runQC.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ import {
import { saveProcessingSettings } from 'redux/actions/experimentSettings';
import { loadBackendStatus } from 'redux/actions/backendStatus';
import { loadEmbedding } from 'redux/actions/embedding';
import { runCellSetsClustering } from 'redux/actions/cellSets';

const runOnlyConfigureEmbedding = async (experimentId, embeddingMethod, dispatch) => {
await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding'));

dispatch({
type: EXPERIMENT_SETTINGS_DISCARD_CHANGED_QC_FILTERS,
payload: {},
Expand All @@ -28,6 +27,21 @@ const runOnlyConfigureEmbedding = async (experimentId, embeddingMethod, dispatch
);
};

const runOnlyClustering = async (experimentId, resolution, dispatch) => {
dispatch({
type: EXPERIMENT_SETTINGS_DISCARD_CHANGED_QC_FILTERS,
payload: {},
});

// Only configure embedding was changed so we run loadEmbedding
dispatch(
runCellSetsClustering(
experimentId,
resolution,
),
);
};

// Question for review, I thought of implementing this function for all the URLs here
// (extracting all the URLs into one single place and using constants to
// define which url I am trying to access)
Expand All @@ -42,12 +56,27 @@ const runQC = (experimentId) => async (dispatch, getState) => {
const { processing } = getState().experimentSettings;
const { changedQCFilters } = processing.meta;

if (changedQCFilters.size === 1 && changedQCFilters.has('configureEmbedding')) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With changedQCFilters.size === 1 removed, now that means that if there's more than one changed filter, running it with changed embedding or clustering in the last step will discard them all and run only embedding / clustering?

Copy link
Contributor Author

@alexvpickering alexvpickering Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changedQCFilters.size === 1 is removed because we will now be looking for either exactly embeddingSettings , clusteringSettings, or both. If both are changed but nothing else we also need to run this conditional block (so size is no longer a good way to distinguish).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I cant test in staging because i dont have an hms account but what i see is that now this will be run even if there are other changedQCFilters from previous steps. There is a return on line 81, so that means qc wont be triggered.
What will happen if i change for example the mitochondrial content filter and then go to configure embedding and change clustering / embedding there? I think mitochondrial filter changes will be discarded and clustering / embedding will be ran right?

Copy link
Contributor Author

@alexvpickering alexvpickering Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you send me an email I'll create an account for you to test. Here is why it doesn't run if there are other steps changed:

const otherChanged = [...changedQCFilters].some((value) => value !== 'embeddingSettings' && value !== 'clusteringSettings');

This checks to see if there are other settings changed. If there are not, then the below conditional block runs. See added comments to hopefully clarify:

if (!otherChanged) {

// this runs because exactly embedding or clustering or both has changed (but nothing else)
await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding'));

if (embeddingChanged) {
      runOnlyConfigureEmbedding(
        experimentId,
        processing.configureEmbedding.embeddingSettings.method,
        dispatch,
      );
    }
    if (clusteringChanged) {
      runOnlyClustering(
        experimentId,
        processing.configureEmbedding.clusteringSettings.methodSettings.louvain.resolution,
        dispatch,
      );
    }
    // we return because we are in the !otherChanged block so nothing else changed and we don't re-run QC
    return;
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh alright sorry

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not at all -- thank you -- always good to have a careful review :)

runOnlyConfigureEmbedding(
experimentId,
processing.configureEmbedding.embeddingSettings.method,
dispatch,
);
const embeddingChanged = changedQCFilters.has('embeddingSettings');
const clusteringChanged = changedQCFilters.has('clusteringSettings');
const otherChanged = [...changedQCFilters].some((value) => value !== 'embeddingSettings' && value !== 'clusteringSettings');

// if only embedding or clustering changed
if (!otherChanged) {
await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding'));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const embeddingChanged = changedQCFilters.has('embeddingSettings');
const clusteringChanged = changedQCFilters.has('clusteringSettings');
const otherChanged = [...changedQCFilters].some((value) => value !== 'embeddingSettings' && value !== 'clusteringSettings');
// if only embedding or clustering changed
if (!otherChanged) {
await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding'));
const embeddingChanged = changedQCFilters.has('embeddingSettings');
const clusteringChanged = changedQCFilters.has('clusteringSettings');
if (embeddingChanged || clusteringChanged) {
await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding'));

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also if you move back the await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding')); line to the runOnlyClustering and runOnlyConfigureEmbedding functions, this whole block can be only:

  const embeddingChanged = changedQCFilters.has('embeddingSettings');
  const clusteringChanged = changedQCFilters.has('clusteringSettings');

    if (embeddingChanged) {
      runOnlyConfigureEmbedding(
        experimentId,
        processing.configureEmbedding.embeddingSettings.method,
        dispatch,
      );
    }
    if (clusteringChanged) {
      runOnlyClustering(
        experimentId,
        processing.configureEmbedding.clusteringSettings.methodSettings.louvain.resolution,
        dispatch,
      );
    }

Copy link
Contributor Author

@alexvpickering alexvpickering Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So now that means that if there's more than one changed filter, running it with changed embedding or clustering in the last step will discard them all and run only embedding / clustering?

The problem with this is that it will run the conditional block if there are other changes to Data Processing in addition to an embedding or clustering change. I want the condition to run if there is only an embedding or clustering change (or both but nothing else). Embedding and clustering will be run after the pipeline finishes if there are any other changes so I don't want to call embedding/clustering in those cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also if you move back the await dispatch(saveProcessingSettings(experimentId, 'configureEmbedding')); line to the runOnlyClustering and runOnlyConfigureEmbedding functions, this whole block can be only:

  const embeddingChanged = changedQCFilters.has('embeddingSettings');
  const clusteringChanged = changedQCFilters.has('clusteringSettings');

    if (embeddingChanged) {
      runOnlyConfigureEmbedding(
        experimentId,
        processing.configureEmbedding.embeddingSettings.method,
        dispatch,
      );
    }
    if (clusteringChanged) {
      runOnlyClustering(
        experimentId,
        processing.configureEmbedding.clusteringSettings.methodSettings.louvain.resolution,
        dispatch,
      );
    }

happy to move the line back if you have a preference. I moved it out so that it wasn't duplicated between the two functions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was only thinking in case we want to use the runOnlyClustering and runOnlyConfigureEmbedding functions somewhere else in the code, someone might expect them to do a save and forget to dispatch(save before running them.

if (embeddingChanged) {
runOnlyConfigureEmbedding(
experimentId,
processing.configureEmbedding.embeddingSettings.method,
dispatch,
);
}
if (clusteringChanged) {
runOnlyClustering(
experimentId,
processing.configureEmbedding.clusteringSettings.methodSettings.louvain.resolution,
dispatch,
);
}

return;
}
Expand All @@ -57,7 +86,6 @@ const runQC = (experimentId) => async (dispatch, getState) => {
const stepConfig = processing[stepKey];
processingConfigDiff[stepKey] = stepConfig;
});

try {
// We are only sending the configuration that we know changed
// with respect to the one that is already persisted in dynamodb
Expand Down
Loading