Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move read filtering to after merging in CRISPResso #397

Merged
merged 4 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Run Integration Tests

on:
push:

pull_request:
types: [opened, reopened]

jobs:
build:
Expand Down Expand Up @@ -37,7 +40,7 @@ jobs:
run: |
mkdir ../CRISPResso2_copy
cp -r * ../CRISPResso2_copy

- name: Copy C2_tests repo
uses: actions/checkout@master
with:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Pylint

on:
push:

pull_request:
types: [opened, reopened]

jobs:
build:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ name: Run Pytest

on:
push:

pull_request:
types: [opened, reopened]

jobs:
build:
Expand Down
64 changes: 26 additions & 38 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -2117,44 +2117,6 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited

info('Done!', {'percent_complete': 4})

if args.min_average_read_quality>0 or args.min_single_bp_quality>0 or args.min_bp_quality_or_N>0:
if args.bam_input != '':
raise CRISPRessoShared.BadParameterException('The read filtering options are not available with bam input')
info('Filtering reads with average bp quality < %d and single bp quality < %d and replacing bases with quality < %d with N ...' % (args.min_average_read_quality, args.min_single_bp_quality, args.min_bp_quality_or_N))
min_av_quality = None
if args.min_average_read_quality > 0:
min_av_quality = args.min_average_read_quality

min_single_bp_quality = None
if args.min_single_bp_quality > 0:
min_single_bp_quality = args.min_single_bp_quality

min_bp_quality_or_N = None
if args.min_bp_quality_or_N > 0:
min_bp_quality_or_N = args.min_bp_quality_or_N

if args.fastq_r2!='':
output_filename_r1=_jp(os.path.basename(args.fastq_r1.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz')
output_filename_r2=_jp(os.path.basename(args.fastq_r2.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz')

from CRISPResso2 import filterFastqs
filterFastqs.filterFastqs(fastq_r1=args.fastq_r1, fastq_r2=args.fastq_r2, fastq_r1_out=output_filename_r1, fastq_r2_out=output_filename_r2, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N)

args.fastq_r1 = output_filename_r1
args.fastq_r2 = output_filename_r2
files_to_remove += [output_filename_r1]
files_to_remove += [output_filename_r2]

else:
output_filename_r1=_jp(os.path.basename(args.fastq_r1.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz')

from CRISPResso2 import filterFastqs
filterFastqs.filterFastqs(fastq_r1=args.fastq_r1, fastq_r1_out=output_filename_r1, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N)

args.fastq_r1 = output_filename_r1
files_to_remove += [output_filename_r1]


#Trim and merge reads
if args.bam_input != '' and args.trim_sequences:
raise CRISPRessoShared.BadParameterException('Read trimming options are not available with bam input')
Expand Down Expand Up @@ -2292,7 +2254,33 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
info('Wrote force-merged reads to ' + new_merged_filename)

info('Done!', {'percent_complete': 7})
else: # single end reads with no trimming
processed_output_filename = args.fastq_r1

if args.min_average_read_quality > 0 or args.min_single_bp_quality > 0 or args.min_bp_quality_or_N > 0:
if args.bam_input != '':
raise CRISPRessoShared.BadParameterException('The read filtering options are not available with bam input')
info('Filtering reads with average bp quality < %d and single bp quality < %d and replacing bases with quality < %d with N ...' % (args.min_average_read_quality, args.min_single_bp_quality, args.min_bp_quality_or_N))
min_av_quality = None
if args.min_average_read_quality > 0:
min_av_quality = args.min_average_read_quality

min_single_bp_quality = None
if args.min_single_bp_quality > 0:
min_single_bp_quality = args.min_single_bp_quality

min_bp_quality_or_N = None
if args.min_bp_quality_or_N > 0:
min_bp_quality_or_N = args.min_bp_quality_or_N

output_filename_r1 = _jp(os.path.basename(
processed_output_filename.replace('.fastq', '')).replace('.gz', '') + '_filtered.fastq.gz',
)

from CRISPResso2 import filterFastqs
filterFastqs.filterFastqs(fastq_r1=processed_output_filename, fastq_r1_out=output_filename_r1, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N)

processed_output_filename = output_filename_r1

#count reads
N_READS_AFTER_PREPROCESSING = 0
Expand Down
24 changes: 21 additions & 3 deletions CRISPResso2/CRISPRessoReports/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,16 @@ Also, note that the default commit message may have a summary of all commits, pl

1. In the parent repo, switch to (or create) the branch on `CRISPRessoReports` that will have the changes you push.

If you are creating a new branch based off of `CRISPRessoReports` master, run this:
If you are creating a new branch based off of `CRISPRessoReports` master, run this to switch to the reports master branch:

``` shell
git checkout -b <feature-branch>-reports reports/master
git checkout reports/master
```

Then, run to actually create (and switch to) the branch that you will be working with:

``` shell
git checkout -b <feature-branch>-reports
```

Or if you would like to push to an existing branch on `CRISPRessoReports`, run this:
Expand All @@ -106,14 +112,26 @@ git merge --squash -Xsubtree="CRISPResso2/CRISPRessoReports" --no-commit --allow

*Note:* `<feature-branch>` is the branch of the parent repo that contains the changes inside the `CRISPRessoReports` sub-directory.

3. Push to `CRISPRessoReports`.

``` shell
git push
```

4. Switch back to your branch on `CRISPResso` or `C2Web`.

``` shell
git checkout <feature-branch>
```

### I am working on a feature that requires changing `CRISPRessoReports`, what do I do?

If a feature that you are working on requires changes to CRISPRessoReports, you will need to perform a few steps to get setup.

1. Create a feature branch in the parent repo, based on the parent repo master.

``` shell
git checkout -b <feature-branch> origin/master
git checkout -b <feature-branch>
```

2. Create a feature branch on `CRISPRessoReports`.
Expand Down
1 change: 0 additions & 1 deletion CRISPResso2/CRISPRessoReports/templates/batchReport.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
{% endblock %}

{% block content %}

<div class="row">
<div class="col-sm-1"></div>
<div class="col-sm-10">
Expand Down
5 changes: 3 additions & 2 deletions CRISPResso2/CRISPRessoReports/templates/multiReport.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
{% endblock %}

{% block content %}

<div class="row">
<div class="col-sm-1"></div>
<div class="col-sm-10">

Expand All @@ -61,7 +61,7 @@ <h5>{{report_name}}</h5>
<div class='card-body p-0'>
<div class="list-group list-group-flush" style='max-height:80vh;overflow-y:auto'>
{% for run_name in run_names %}
<a href="{{report_data['crispresso_data_path']}}{{sub_html_files[run_name]}}" class="list-group-item list-group-item-action">{{run_name}}</a>
<a href="{{sub_html_files[run_name]}}" class="list-group-item list-group-item-action">{{run_name}}</a>
{% endfor %}
</div>
</div>
Expand Down Expand Up @@ -162,6 +162,7 @@ <h5>Summary Plots</h5>
</div> {# column #} <!-- end column -->

<div class="col-sm-1"></div>
</div>
{% endblock %}

{% block foot %}
Expand Down
5 changes: 2 additions & 3 deletions CRISPResso2/CRISPRessoReports/templates/pooledReport.html
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
{% endblock %}

{% block content %}

<div class="row">
<div class="col-sm-1"></div>
<div class="col-sm-10">
Expand All @@ -60,8 +59,8 @@ <h5>{{report_name}}</h5>
<div class='card-body p-0'>
<div class="list-group list-group-flush">
{% for region_name in run_names %}
<a href="{{sub_html_files[region_name]}}" class="list-group-item list-group-item-action">{{region_name}}</a>
{% endfor %}
<a href="{{sub_html_files[region_name]}}" class="list-group-item list-group-item-action">{{region_name}}</a>
{% endfor %}
</div>
</div>
</div>
Expand Down
5 changes: 2 additions & 3 deletions CRISPResso2/CRISPRessoReports/templates/wgsReport.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
{% endblock %}

{% block content %}

<div class="row">
<div class="col-sm-1"></div>
<div class="col-sm-10">
Expand All @@ -60,8 +59,8 @@ <h5>{{report_name}}</h5>
<div class="card-body p-0">
<div class="list-group list-group-flush">
{% for region_name in run_names %}
<a href="{{sub_html_files[region_name]}}" class="list-group-item list-group-item-action" id="{{region_name}}">{{region_name}}</a>
{% endfor %}
<a href="{{sub_html_files[region_name]}}" class="list-group-item list-group-item-action" id="{{region_name}}">{{region_name}}</a>
{% endfor %}
</div>
</div>
</div>
Expand Down