comparative-genomics-project/workflow/scripts/remove_supercontigs.awk

26 lines
462 B
Awk

#!/usr/bin/env -S awk -f
# Filter out 'supercontigs' records
# from an Ensembl proteome fasta file
# Usage:
# awk -f remove_supercontigs.awk \
# "proteome.fasta" \
# > "proteome_nosupercontigs.fasta"
BEGIN {
on_good_sequence = 0
}
/^>/ {
on_good_sequence = 0
locus = $3
split(locus, locus_array, ":")
if (locus_array[1] != "supercontig") {
on_good_sequence = 1
print $0
}
}
/^[^>]/ && on_good_sequence {
print $0
}