选择信号加基因结构图

import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d

# 数据准备
data = pd.read_table("site_fst_pi/TNFSF4.fst_pi.merge")
data


# 筛选指定范围的数据
filtered_data = data[(data['POS'] >= 40780000) & (data['POS'] <= 40900000)]

# 提取筛选后的数据
pos = filtered_data['POS']
fst = filtered_data['WEIR_AND_COCKERHAM_FST']
gd_pi = filtered_data['GD_PI']
hn_pi = filtered_data['HN_PI']

# 平滑数据
fst_smooth = gaussian_filter1d(fst, sigma=2)
gd_pi_smooth = gaussian_filter1d(gd_pi, sigma=2)
hn_pi_smooth = gaussian_filter1d(hn_pi, sigma=2)

# 基因结构数据
genes = [
    {"name": "TNFSF4", 
     "start": 40817340, 
     "end": 40838177, #98066623        98090010 
     "exons": [(40836244,40837022), 
               (40820756,40820804),
               (40817340,40819014),
              ],
     "strand": "+"},  # 使用 "+" 表示正链,"-" 表示负链
]

# 创建图形和子图布局
fig = plt.figure(figsize=(5, 5))  # 总尺寸宽为5,高为10
gs = fig.add_gridspec(3, 1, height_ratios=[4, 4, 2])  # 高度比例:4, 4, 2

# 第一张图:FST 折线图
ax1 = fig.add_subplot(gs[0, 0])
ax1.plot(pos, fst_smooth, label='Leiqiong_GD vs. Leiqiong_HN', color='#527CB5')
ax1.set_ylabel('Fst', fontsize=12)
ax1.legend(fontsize=6, loc='upper right')
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.tick_params(labelbottom=False)  # 隐藏 X 轴刻度标签

# 第二张图:GD_PI 和 HN_PI 折线图
ax2 = fig.add_subplot(gs[1, 0], sharex=ax1)
ax2.plot(pos, gd_pi_smooth, label='Leiqiong_GD', color='#009384')
ax2.plot(pos, hn_pi_smooth, label='Leiqiong_HN', color='#ef6e56')
ax2.set_ylabel('θπ', fontsize=12)
ax2.legend(fontsize=6, loc='lower right')
ax2.grid(True, linestyle='--', alpha=0.7)
ax2.tick_params(labelbottom=False)  # 隐藏 X 轴刻度标签

# 第三张图:基因结构图
ax3 = fig.add_subplot(gs[2, 0], sharex=ax1)
for gene in genes:
    ax3.plot(
        [gene["start"], gene["end"]], [0.5, 0.5],
        color="green", linewidth=2, zorder=1
    )
    # 绘制外显子
    for exon in gene["exons"]:
        ax3.add_patch(mpatches.Rectangle(
            (exon[0], 0.4), exon[1] - exon[0], 0.2,
            color="green", zorder=2
        ))
    # 添加基因名称
    ax3.text(
        (gene["start"] + gene["end"]) / 2, 0.7,
        gene["name"], ha="center", fontsize=10
    )
    
    # 计算内含子位置并添加箭头
    exons_sorted = sorted(gene["exons"], key=lambda x: x[0])  # 按起始位置排序
    min_intron_length = 10000  # 设置最小内含子长度,避免标注过短的内含子
    for i in range(len(exons_sorted) - 1):
        intron_start = exons_sorted[i][1]
        intron_end = exons_sorted[i + 1][0]
        intron_length = intron_end - intron_start
        
        # 跳过长度小于最小内含子长度的内含子
        if intron_length < min_intron_length:
            continue
        
        intron_mid = (intron_start + intron_end) // 2  # 取内含子的中点
        
        # 根据转录方向添加箭头
        if gene["strand"] == "+":
            ax3.add_patch(FancyArrow(
                intron_mid, 0.5, 10000, 0,  # 起点、宽度和高度
                width=0.01, head_width=0.05, head_length=5000,
                length_includes_head=True, color="gray"
            ))
        elif gene["strand"] == "-":
            ax3.add_patch(FancyArrow(
                intron_mid, 0.5, -10000, 0,  # 起点、宽度和高度
                width=0.01, head_width=0.05, head_length=5000,
                length_includes_head=True, color="gray"
            ))

ax3.set_ylim(0, 1)
ax3.set_yticks([])
#ax3.set_xlabel("Position (Mb)", fontsize=12)
ax3.set_ylabel('Gene', fontsize=12)
ax3.set_xticks(np.arange(40780000, 40900000, 20000))
ax3.set_xticklabels([f"{x / 1e6:.2f}" for x in np.arange(40780000, 40900000, 20000)], fontsize=10)
#ax3.set_title("Gene Structure with Transcription Direction", fontsize=10)

# 调整布局
plt.tight_layout()
plt.savefig('TNFSF4.fst_pi_gene.pdf')

image.png