comparative-genomics-project/workflow/scripts/protein_lengths.awk

28 lines
448 B
Awk

#!/usr/bin/env -S awk -f
# Associate a isoform id to the length of the sequence
BEGIN {
sequence_length=0
isoform_id=""
}
/^>/ {
if (isoform_id != "") {
print isoform_id, sequence_length
} else {
isoform_id = $1
gsub(">", "", isoform_id)
sequence_length = 0
}
}
/^[^>]/ {
sequence_length += length($0)
}
END {
if (isoform_id != "") {
print isoform_id, sequence_length
}
}