Source code for gpsea.view._txp

import typing
from collections import defaultdict
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.lines import Line2D
from gpsea.model import Variant, TranscriptCoordinates, ProteinMetadata


def get_interpolated_location_in_protein(aa_pos_in_prot:int,
                                        prot_len:int,
                                        plot_width:int=1200,
                                        margin_size:int=20) -> typing.Tuple[int,int]:
    used_plot_width = plot_width - 2*margin_size
    rel_pos_in_prot = aa_pos_in_prot / prot_len
    interpolated_x = margin_size + used_plot_width * rel_pos_in_prot
    return interpolated_x



[docs] class VariantTranscriptVisualizer: """ `VariantTranscriptProteinArtist` creates a graphic to show distributions of variants across a provided transcript and its protein product. """ # noinspection PyMethodMayBeStatic
[docs] def draw_variants(self, variants: typing.Iterable[Variant], tx: TranscriptCoordinates, protein: ProteinMetadata): title = f"{protein.protein_id} ({protein.label})" _, ax = plt.subplots(1, figsize=(10, 10)) protein_domains = set() _THRESHOLD = 2 BOTTOM_MARGIN = 20 amino_acid_len = tx.get_codon_count() # draw a box that is ten aax tall, where aax is the dimension of one amino acid prot_start = get_interpolated_location_in_protein(1, amino_acid_len) prot_end = get_interpolated_location_in_protein(amino_acid_len, amino_acid_len) _box_height = 10/amino_acid_len prot_width = prot_end - prot_start + 1 protein_height = prot_width/20 #rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height) rect = Rectangle((50, 30), prot_width, 40, edgecolor='black', facecolor='white', alpha=0.5) ax.add_patch(rect) for variant in variants: tx_annot_list = [txa for txa in variant.tx_annotations if txa.transcript_id == tx.identifier] if len(tx_annot_list) == 0: raise ValueError(f"Could not find variant transcript annotations for {tx.identifier}") if len(tx_annot_list) > 1: # Should never happen raise ValueError(f"Found mutiple variant transcript annotations for {tx.identifier}") tx_annot = tx_annot_list[0] hgvs = tx_annot.hgvsc_id.split(":") if len(hgvs) > 1: hgvs_cdna = hgvs[1] else: hgvs_cdna = hgvs variant_effects = tx_annot.variant_effects if len(variant_effects) > 1: _var_effect = "MULTIPLE" elif len(variant_effects) == 0: _var_effect = "UNKNOWN" else: _var_effect = variant_effects[0].name for p in tx_annot.protein_affected: for f in p.domains(): protein_domains.add(f.info) if tx_annot.protein_effect_location is None: print(f"skipping {hgvs_cdna} because no protein effect location was found") continue # for plotting, we will just show the start position; note there is also end pe_loc = tx_annot.protein_effect_location.start prot_variant_location = get_interpolated_location_in_protein(pe_loc, amino_acid_len) x = [prot_variant_location, prot_variant_location] ybase = BOTTOM_MARGIN + protein_height ytop = ybase + 10 y = [ybase, ytop] line = Line2D(x, y) ax.add_line(line) # TODO show one box per mutation or otherwise represent frequency color_list = ['red', 'blue', 'green', 'orange', 'brown', 'yellow', 'purple'] feature_to_color_index_d = defaultdict(int) # default is red for feature in protein_domains: name = feature.name if name not in feature_to_color_index_d: new_idx = len(feature_to_color_index_d) % len(color_list) feature_to_color_index_d[name] = new_idx color_idx = feature_to_color_index_d.get(name) box_color = color_list[color_idx] start = feature.start end = feature.end #print(name, start, end, box_color) _box_height = 10/amino_acid_len prot_width = prot_end - prot_start + 1 protein_height = prot_width/20 #rect = Rectangle((prot_start, BOTTOM_MARGIN), prot_width, protein_height) box_start = get_interpolated_location_in_protein(start, amino_acid_len) box_end = get_interpolated_location_in_protein(end, amino_acid_len) box_width = box_end - box_start rect = Rectangle((box_start, 30), box_width, 40, edgecolor='black', facecolor=box_color, alpha=0.5) ax.add_patch(rect) ax.set_title(title, loc='left', fontsize='x-large') ax.set_xlabel('X Label', labelpad=12, fontsize=14.5) ax.set_ylabel('Y label', labelpad=12, fontsize=14.5) ax.set_xlim(-10, 1200) ax.set_ylim(0,120)