26 lines
462 B
Awk
26 lines
462 B
Awk
|
#!/usr/bin/env -S awk -f
|
||
|
# Filter out 'supercontigs' records
|
||
|
# from an Ensembl proteome fasta file
|
||
|
# Usage:
|
||
|
# awk -f remove_supercontigs.awk \
|
||
|
# "proteome.fasta" \
|
||
|
# > "proteome_nosupercontigs.fasta"
|
||
|
|
||
|
BEGIN {
|
||
|
on_good_sequence = 0
|
||
|
}
|
||
|
|
||
|
/^>/ {
|
||
|
on_good_sequence = 0
|
||
|
locus = $3
|
||
|
split(locus, locus_array, ":")
|
||
|
if (locus_array[1] != "supercontig") {
|
||
|
on_good_sequence = 1
|
||
|
print $0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/^[^>]/ && on_good_sequence {
|
||
|
print $0
|
||
|
}
|