#!/usr/bin/env python # -*- coding: utf-8 -*- """ Title : drawRNA.py project : web Created by: julse Created on: 2025/7/4 14:24 des: TODO """ import sys import os import time import pandas as pd import numpy as np import gradio as gr import pandas as pd import numpy as np import os import tempfile import subprocess from PIL import Image # 定义颜色样式 - 固定使用一组预定义颜色 COLORS = [ '#FF0000', # 红色 - UTR5 '#0000FF', # 蓝色 - CDS起始区 '#FFC0CB', # 粉色 - CDS终止区 '#FFA500', # 橙色 - UTR3 '#FFFF00', # 黄色 - 起始密码子 '#800080' # 紫色 - 终止密码子 ] COLOR_MAP = { 'UTR5': '#FF0000', # 红色 'CDS_start': '#0000FF', # 蓝色 - CDS起始区 'CDS_mid': '#00FF00', # 绿色 - CDS中间区(添加的) 'CDS_end': '#FFC0CB', # 粉色 - CDS终止区 'UTR3': '#FFA500', # 橙色 'start_codon': '#FFFF00', # 黄色 - 起始密码子 'stop_codon': '#800080', # 紫色 - 终止密码子 'intron': '#A9A9A9', # 灰色 - 内含子(添加的) 'exon': '#90EE90', # 浅绿色 - 外显子(添加的) } def get_bases_index(utr5, cds, utr3): """计算各区域的位置索引""" start_codon_idx = len(utr5) stop_codon_idx = len(utr5) + len(cds) # UTR5区域 utr5_start = max(0, start_codon_idx - 300) utr5_range = list(range(utr5_start + 1, start_codon_idx + 1)) # CDS起始区(不包括起始密码子) cds_start = start_codon_idx + 3 cds_end = min(start_codon_idx + 300, stop_codon_idx - 3) start_codon_range = list(range(cds_start + 1, cds_end + 1)) # CDS终止区(不包括终止密码子) cds_start = max(start_codon_idx, stop_codon_idx - 300) stop_codon_range = list(range(cds_start + 1, stop_codon_idx - 2)) # UTR3区域 utr3_range = list(range(stop_codon_idx + 1, min(stop_codon_idx + 301, stop_codon_idx + len(utr3) + 1))) # 起始密码子 (3个碱基) start_codon = list(range(start_codon_idx + 1, start_codon_idx + 4)) # 终止密码子 (3个碱基) stop_codon = list(range(stop_codon_idx - 2, stop_codon_idx + 1)) # 转换为逗号分隔的字符串 return ( ",".join(map(str, utr5_range)), ",".join(map(str, start_codon_range)), ",".join(map(str, stop_codon_range)), ",".join(map(str, utr3_range)), ",".join(map(str, start_codon)), ",".join(map(str, stop_codon)) ) def calc_mfe(seq): import RNA fc = RNA.fold_compound(seq) ss, mfe = fc.mfe() return ss, mfe def dbn_to_tuple(dbn, c1_region=[], c2_region=[]): # 构建配对字典 stack, pairs = [], {} for i, char in enumerate(dbn): if char == '(': stack.append(i) elif char == ')': j = stack.pop() if len(c1_region) == 0 or len(c2_region) == 0: pairs[i + 1] = j + 1 else: if i + 1 in c2_region and j + 1 in c1_region: pairs[i + 1] = j + 1 return pairs def run_cmd(command, output_file): # 执行命令 result = subprocess.run(command, capture_output=True, text=True) # 检查是否执行成功 if result.returncode != 0: error_msg = f"执行VARNA命令时出错:\n{result.stderr}" os.unlink(output_file) # 删除临时文件 raise RuntimeError(error_msg) # 检查文件是否成功创建 if not os.path.exists(output_file): raise FileNotFoundError("未能生成结构图文件") def run_draw_rna_advanced(full_sequence, structure, utr5_range, start_codon_range, stop_codon_range, utr3_range, start_codon, stop_codon, focus_region, auxBPs, output_file,algorithm, title=''): import matplotlib.pyplot as plt from draw_rna.ipynb_draw import draw_struct # 解析输入 utr5_range = eval(utr5_range) start_codon_range = eval(start_codon_range) stop_codon_range = eval(stop_codon_range) utr3_range = eval(utr3_range) start_codon = eval(start_codon) stop_codon = eval(stop_codon) # 定义颜色方 # 颜色映射 COLOR_MAP = { 'UTR5': '#FF0000', # 红色 'CDS_start': '#0000FF', # 蓝色 - CDS起始区 'CDS_end': '#FFC0CB', # 粉色 - CDS终止区 'UTR3': '#FFA500', # 橙色 'start_codon': '#FFFF00', # 黄色 - 起始密码子 'stop_codon': '#800080', # 紫色 - 终止密码子 'default': '#808080' # 灰色 } # 区域到数值的映射 region_to_value = { 'default':0, 'UTR5': 1, 'CDS_start': 2, 'CDS_end': 3, 'UTR3': 4, 'start_codon': 5, 'stop_codon': 6 } # 自定义colormap from matplotlib.colors import ListedColormap # 创建自定义颜色列表,按照数值顺序 custom_colors = [ COLOR_MAP['default'], # 0: 灰色 COLOR_MAP['UTR5'], # 1: 红色 COLOR_MAP['CDS_start'], # 2: 蓝色 COLOR_MAP['CDS_end'], # 3: 粉色 COLOR_MAP['UTR3'], # 4: 橙色 COLOR_MAP['start_codon'], # 5: 黄色 COLOR_MAP['stop_codon'] # 6: 紫色 ] custom_cmap = ListedColormap(custom_colors) # 创建数值数组,每个数值对应一种颜色 colors = [region_to_value['default']]*len(full_sequence) for i in utr5_range: colors[i-1]= region_to_value['UTR5'] for i in utr3_range: colors[i-1] = region_to_value['UTR3'] for i in start_codon_range: colors[i-1] = region_to_value['CDS_start'] for i in stop_codon_range: colors[i-1] = region_to_value['CDS_end'] for i in start_codon: colors[i-1] = region_to_value['start_codon'] for i in stop_codon: colors[i-1] = region_to_value['stop_codon'] draw_struct(full_sequence, structure, c = colors, cmap = custom_cmap, vmin = 0, vmax = 6, line=algorithm, ) # 添加图例 color_scheme = COLOR_MAP legend_elements = [ plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['UTR5'], edgecolor='black', label="5'UTR"), plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['CDS_start'], edgecolor='black', label="CDS Start"), plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['CDS_end'], edgecolor='black', label="CDS End"), plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['UTR3'], edgecolor='black', label="3'UTR"), plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['start_codon'], edgecolor='black', label="Start Codon"), plt.Rectangle((0, 0), 1, 1, facecolor=color_scheme['stop_codon'], edgecolor='black', label="Stop Codon"), ] plt.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1.05, 1), fontsize=10) # 调整布局并保存 plt.savefig(output_file, dpi=300, bbox_inches='tight') plt.close() print(f"Successfully created: {output_file}") def draw_simple(utr5_seq, title=''): # img_paths = [] stru5, mfe = calc_mfe(utr5_seq) import matplotlib.pyplot as plt from draw_rna.ipynb_draw import draw_struct draw_struct(utr5_seq, stru5) # 创建临时文件 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: output_file = tmpfile.name # img_paths.append((output_file,title)) plt.title(title) # 保存当前活动的图形 plt.savefig(output_file, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none') return output_file, mfe, stru5 def generate_rna_structure(utr5_seq, cds_seq, utr3_seq, structure, draw_2d=["mRNA"]): """生成RNA结构图""" message = "" # 组合完整序列 full_sequence = utr5_seq + cds_seq + utr3_seq mfe = None img_paths = [] if "Full mRNA" in draw_2d: if structure == "": structure, mfe = calc_mfe(full_sequence) # 验证序列和结构长度匹配 if len(full_sequence) != len(structure): return f"序列长度({len(full_sequence)})与结构长度({len(structure)})不匹配" '''full mRNA''' # 获取各区域位置 utr5_range, start_codon_range, stop_codon_range, utr3_range, start_codon, stop_codon = get_bases_index( utr5_seq, cds_seq, utr3_seq ) focus_region = f'{min(eval(utr5_range))}-{max(eval(start_codon_range))}:fill=#bcffdd;{min(eval(stop_codon_range))}-{max(eval(utr3_range))}:fill=#bcffdd' pairs = dbn_to_tuple(structure, c1_region=eval(','.join([utr5_range, start_codon, start_codon_range])), c2_region=eval(','.join([stop_codon_range, utr3_range, stop_codon]))) auxBPs = ';'.join([f'({key},{value}):color=#6ed86e' for key, value in pairs.items()]) for algorithm in ["line", "naview"]: # 创建临时文件 with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: output_file = tmpfile.name img_paths.append((output_file,f'mRNA_{algorithm}')) # algorithm = "line" # 线条算法 # 构建VARNA命令, write to file # run_VARNA(full_sequence, structure, utr5_range, start_codon_range, stop_codon_range, utr3_range, # start_codon, stop_codon, focus_region, auxBPs, output_file, algorithm, title='mRNA') # algorithm = algorithm=="line" # 线条算法 # 构建VARNA命令, write to file run_draw_rna_advanced(full_sequence, structure, utr5_range, start_codon_range, stop_codon_range, utr3_range, start_codon, stop_codon, focus_region, auxBPs, output_file,algorithm, title='mRNA') if "5'leader (30 nt)" in draw_2d: img_path, local_mfe, stru5 = draw_simple(full_sequence[:30], title="5'leader (30 nt))") img_paths.append((img_path,'head_30')) message += f"\nhead(30nt) MFE={local_mfe:.2f} kcal/mol" if "5'UTR" in draw_2d: img_path, local_mfe, stru5 = draw_simple(utr5_seq, title="5'UTR") img_paths.append((img_path,'utr5')) message += f"\n5'UTR MFE={local_mfe:.2f} kcal/mol" if "CDS" in draw_2d: img_path, local_mfe, stru5 = draw_simple(cds_seq, title="CDS") img_paths.append((img_path,'cds')) message += f"\nCDS MFE={local_mfe:.2f} kcal/mol" if "3'UTR" in draw_2d: img_path, local_mfe, stru5 = draw_simple(utr3_seq, title="3'UTR") img_paths.append((img_path,'utr3')) message += f"\n3'UTR MFE={local_mfe:.2f} kcal/mol" return img_paths, mfe, structure, message def visualize_rna(utr5_seq, cds_seq, utr3_seq, structure): """可视化RNA结构的主函数""" # 生成RNA结构图 image_path, mfe, structure, message = generate_rna_structure(utr5_seq, cds_seq, utr3_seq, structure) mfe = f'MFE={mfe:.2f} kcal/mol' if mfe else None # 返回图像 return image_path, mfe, structure, message def draw_rna_2d(): # 创建Gradio界面 with gr.Blocks(title="RNA结构可视化") as demo: gr.Markdown("# RNA结构可视化工具") gr.Markdown("使用VARNA可视化RNA二级结构,并高亮显示不同区域") with gr.Row(): with gr.Column(scale=1): utr5_seq = gr.Textbox(label="5'UTR序列", value="AUGCCAUGAACAGCUAC", placeholder="输入5'UTR序列...") cds_seq = gr.Textbox(label="CDS序列", value="AUGCCAUGAACAGCUAC", placeholder="输入CDS序列...") utr3_seq = gr.Textbox(label="3'UTR序列", value="AUGCCAUGAACAGCUAC", placeholder="输入3'UTR序列...") structure = gr.Textbox( label="二级结构", value="...........((((.((((.((((........)))).))))...))))..", placeholder="输入点括号表示的二级结构..." ) submit_btn = gr.Button("生成结构图", variant="primary") with gr.Column(): # output_image = gr.Image(label="RNA结构图", interactive=False) output_image = gr.Gallery(label="RNA结构图", interactive=False, object_fit="contain") mfe = gr.Markdown(label="MFE", value="") message = gr.Markdown(label="Message", value="") # 颜色图例 with gr.Accordion("颜色说明", open=False): gr.Markdown(""" | 颜色 | 区域 | |------|------| | 红色 | 5'UTR 区域 | | 蓝色 | CDS起始区域 | | 粉色 | CDS终止区域 | | 橙色 | 3'UTR 区域 | | 黄色 | 起始密码子 (AUG) | | 紫色 | 终止密码子 (UAA, UAG, UGA) | """) # 示例数据 with gr.Accordion("示例数据", open=False): gr.Examples( examples=[ [ "AUGCCAUGAACAGCUAC", "AUGCCAUGAACAGCUAC", "AUGCCAUGAACAGCUAC", "...........((((.((((.((((........)))).))))...)))).." ], [ "GGGAAAUUUCCC", "AUGCCAUGAACAGCUAC", "UUUAAAGGGCCC", "((((....))))..(((.((((.......))))...))).." ] ], inputs=[utr5_seq, cds_seq, utr3_seq, structure] ) # 提交处理 submit_btn.click( visualize_rna, inputs=[utr5_seq, cds_seq, utr3_seq, structure], outputs=[output_image, mfe, structure, message] ) return demo # 运行应用 if __name__ == "__main__": demo = draw_rna_2d() demo.launch(server_port=8080, debug=True)