comparative-genomics-project/workflow/scripts/remove_supercontigs.awk

26 lines
462 B
Awk
Raw Normal View History

2024-10-28 10:46:05 +01:00
#!/usr/bin/env -S awk -f
# Filter out 'supercontigs' records
# from an Ensembl proteome fasta file
# Usage:
# awk -f remove_supercontigs.awk \
# "proteome.fasta" \
# > "proteome_nosupercontigs.fasta"
BEGIN {
on_good_sequence = 0
}
/^>/ {
on_good_sequence = 0
locus = $3
split(locus, locus_array, ":")
if (locus_array[1] != "supercontig") {
on_good_sequence = 1
print $0
}
}
/^[^>]/ && on_good_sequence {
print $0
}