import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
# 数据准备
data = pd.read_table("site_fst_pi/TNFSF4.fst_pi.merge")
data
# 筛选指定范围的数据
filtered_data = data[(data['POS'] >= 40780000) & (data['POS'] <= 40900000)]
# 提取筛选后的数据
pos = filtered_data['POS']
fst = filtered_data['WEIR_AND_COCKERHAM_FST']
gd_pi = filtered_data['GD_PI']
hn_pi = filtered_data['HN_PI']
# 平滑数据
fst_smooth = gaussian_filter1d(fst, sigma=2)
gd_pi_smooth = gaussian_filter1d(gd_pi, sigma=2)
hn_pi_smooth = gaussian_filter1d(hn_pi, sigma=2)
# 基因结构数据
genes = [
{"name": "TNFSF4",
"start": 40817340,
"end": 40838177, #98066623 98090010
"exons": [(40836244,40837022),
(40820756,40820804),
(40817340,40819014),
],
"strand": "+"}, # 使用 "+" 表示正链,"-" 表示负链
]
# 创建图形和子图布局
fig = plt.figure(figsize=(5, 5)) # 总尺寸宽为5,高为10
gs = fig.add_gridspec(3, 1, height_ratios=[4, 4, 2]) # 高度比例:4, 4, 2
# 第一张图:FST 折线图
ax1 = fig.add_subplot(gs[0, 0])
ax1.plot(pos, fst_smooth, label='Leiqiong_GD vs. Leiqiong_HN', color='#527CB5')
ax1.set_ylabel('Fst', fontsize=12)
ax1.legend(fontsize=6, loc='upper right')
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.tick_params(labelbottom=False) # 隐藏 X 轴刻度标签
# 第二张图:GD_PI 和 HN_PI 折线图
ax2 = fig.add_subplot(gs[1, 0], sharex=ax1)
ax2.plot(pos, gd_pi_smooth, label='Leiqiong_GD', color='#009384')
ax2.plot(pos, hn_pi_smooth, label='Leiqiong_HN', color='#ef6e56')
ax2.set_ylabel('θπ', fontsize=12)
ax2.legend(fontsize=6, loc='lower right')
ax2.grid(True, linestyle='--', alpha=0.7)
ax2.tick_params(labelbottom=False) # 隐藏 X 轴刻度标签
# 第三张图:基因结构图
ax3 = fig.add_subplot(gs[2, 0], sharex=ax1)
for gene in genes:
ax3.plot(
[gene["start"], gene["end"]], [0.5, 0.5],
color="green", linewidth=2, zorder=1
)
# 绘制外显子
for exon in gene["exons"]:
ax3.add_patch(mpatches.Rectangle(
(exon[0], 0.4), exon[1] - exon[0], 0.2,
color="green", zorder=2
))
# 添加基因名称
ax3.text(
(gene["start"] + gene["end"]) / 2, 0.7,
gene["name"], ha="center", fontsize=10
)
# 计算内含子位置并添加箭头
exons_sorted = sorted(gene["exons"], key=lambda x: x[0]) # 按起始位置排序
min_intron_length = 10000 # 设置最小内含子长度,避免标注过短的内含子
for i in range(len(exons_sorted) - 1):
intron_start = exons_sorted[i][1]
intron_end = exons_sorted[i + 1][0]
intron_length = intron_end - intron_start
# 跳过长度小于最小内含子长度的内含子
if intron_length < min_intron_length:
continue
intron_mid = (intron_start + intron_end) // 2 # 取内含子的中点
# 根据转录方向添加箭头
if gene["strand"] == "+":
ax3.add_patch(FancyArrow(
intron_mid, 0.5, 10000, 0, # 起点、宽度和高度
width=0.01, head_width=0.05, head_length=5000,
length_includes_head=True, color="gray"
))
elif gene["strand"] == "-":
ax3.add_patch(FancyArrow(
intron_mid, 0.5, -10000, 0, # 起点、宽度和高度
width=0.01, head_width=0.05, head_length=5000,
length_includes_head=True, color="gray"
))
ax3.set_ylim(0, 1)
ax3.set_yticks([])
#ax3.set_xlabel("Position (Mb)", fontsize=12)
ax3.set_ylabel('Gene', fontsize=12)
ax3.set_xticks(np.arange(40780000, 40900000, 20000))
ax3.set_xticklabels([f"{x / 1e6:.2f}" for x in np.arange(40780000, 40900000, 20000)], fontsize=10)
#ax3.set_title("Gene Structure with Transcription Direction", fontsize=10)
# 调整布局
plt.tight_layout()
plt.savefig('TNFSF4.fst_pi_gene.pdf')
