Skip to content

Commit a19a8a3

Browse files
committed
Output low quality bins
1 parent 77f1dd7 commit a19a8a3

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

src/metacoag_main.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,10 +821,13 @@ def thread_function(n, contig, coverages, normalized_tetramer_profiles, bin_seed
821821

822822
# Get output path
823823
output_bins_path = output_path + prefix + "bins/"
824+
lq_output_bins_path = output_path + prefix + "low_quality_bins/"
824825

825826
# Create output directory for bin files
826827
if not os.path.isdir(output_bins_path):
827828
subprocess.run("mkdir -p " + output_bins_path, shell=True)
829+
if not os.path.isdir(lq_output_bins_path):
830+
subprocess.run("mkdir -p " + lq_output_bins_path, shell=True)
828831

829832
for bin_clique in bin_cliques:
830833

@@ -835,6 +838,7 @@ def thread_function(n, contig, coverages, normalized_tetramer_profiles, bin_seed
835838
if len(bin_clique) == 1 and bin_clique[0] in bins_to_rem:
836839
can_write = False
837840

841+
# Write output bins
838842
if can_write:
839843

840844
for b in bin_clique:
@@ -853,6 +857,25 @@ def thread_function(n, contig, coverages, normalized_tetramer_profiles, bin_seed
853857
subprocess.run("awk -F'>' 'NR==FNR{ids[$0]; next} NF>1{f=($2 in ids)} f' " + output_bins_path + prefix + "bin_" + bin_name +
854858
"_ids.txt " + contigs_file + " > " + output_bins_path + prefix + "bin_" + bin_name + "_seqs.fasta", shell=True)
855859

860+
# Write low quality bins
861+
else:
862+
863+
for b in bin_clique:
864+
865+
# Write contig identifiers of each bin to files
866+
with open(lq_output_bins_path + prefix + "bin_" + bin_name + "_ids.txt", "w") as bin_file:
867+
for contig in bins[b]:
868+
869+
if assembler == "megahit":
870+
bin_file.write(
871+
contig_descriptions[graph_to_contig_map[contig_names[contig]]] + "\n")
872+
else:
873+
bin_file.write(contig_names[contig] + "\n")
874+
875+
# Write contigs of each bin to files
876+
subprocess.run("awk -F'>' 'NR==FNR{ids[$0]; next} NF>1{f=($2 in ids)} f' " + lq_output_bins_path + prefix + "bin_" + bin_name +
877+
"_ids.txt " + contigs_file + " > " + lq_output_bins_path + prefix + "bin_" + bin_name + "_seqs.fasta", shell=True)
878+
856879
logger.info("Final binning results can be found in " + str(output_bins_path))
857880

858881

0 commit comments

Comments
 (0)