[ ]:

[1]:
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib as mpl
mpl.rcParams['pdf.fonttype']=42
mpl.rcParams['ps.fonttype']=42

import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib inline

import scanpy as sc
import anndata as ad


from scipy.cluster import hierarchy
from scipy.spatial import distance
from collections import defaultdict
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py:20: UserWarning: Pandas requires version '2.7.3' or newer of 'numexpr' (version '2.7.1' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED
[2]:
grnboost = pd.read_table("../../data/grn/TFR_grnboost2.csv", sep="\t")
adata = ad.read_h5ad("../../data/CC_DD_Exp_energy_sugar.h5ad")
[3]:
Suc_TFs = grnboost.query('target=="Suc" and importance>=7')
Suc_TFs_modules = grnboost[grnboost["TF"].isin(Suc_TFs['TF'])].query('target!="Suc"')
Suc_TFs_modules = pd.concat([Suc_TFs_modules, Suc_TFs], axis=0)

Suc_TFs_modules["TF_type"] = adata.var.loc[Suc_TFs_modules["TF"], "type"].to_list()
Suc_TFs_modules["target_type"] = adata.var.loc[Suc_TFs_modules["target"], "type"].to_list()
Suc_TFs_modules = Suc_TFs_modules.query('target_type!="energy"')

Suc_knowledge = pd.read_table("Suc_knowledge_related_gene.txt")
Suc_knowledge.index = Suc_knowledge['ID']
#Suc_knowledge = Suc_knowledge[Suc_knowledge["ID"].isin(Suc_TFs_modules['target'])]

Suc_TFs_modules["target_group"] = None
Suc_TFs_modules["target_symbol"] = None

Suc_TFs_modules_inKnowledge = Suc_TFs_modules[Suc_TFs_modules['target'].isin(Suc_knowledge["ID"])]

Suc_TFs_modules.loc[Suc_TFs_modules_inKnowledge.index, "target_group"] = Suc_knowledge.loc[Suc_TFs_modules_inKnowledge['target'], "group"].to_list()
Suc_TFs_modules.loc[Suc_TFs_modules_inKnowledge.index, "target_symbol"] = Suc_knowledge.loc[Suc_TFs_modules_inKnowledge['target'], "symbol"].to_list()

Suc_TFs_modules_all = Suc_TFs_modules.copy()


Suc_TFs_modules = Suc_TFs_modules.query('importance>=20 or target=="Suc"')
print(Suc_TFs_modules.shape)
Suc_TFs_modules.head(2)

(722, 7)
[3]:
TF target importance TF_type target_type target_group target_symbol
12 Solyc02g082110.4 Solyc01g110290.3 73.238874 TF gene None None
24 Solyc02g082110.4 Solyc09g074100.4 68.823340 TF gene None None
[4]:
Suc_TFs_modules.query('target_group == target_group')
[4]:
TF target importance TF_type target_type target_group target_symbol
7365 Solyc07g026680.2 Solyc12g008510.2 30.169215 TF gene metabolism SlHXK3
8121 Solyc07g026680.2 Solyc04g081400.3 29.338428 TF gene metabolism SlHXK4
[5]:
#Suc_TFs_modules_all = grnboost[grnboost["TF"].isin(Suc_TFs['TF'])].query('target!="Suc"')
#Suc_TFs_modules_all[]
[6]:
#ax = sns.boxplot(Suc_TFs_modules_all.query('target_group == target_group')['importance'])
#ax.set_xlim(0,1)
[7]:
gene_TF_type = pd.read_table("../cluster_TFs/CCDD_cluster_gene7860_type.csv", sep=",")
gene_TF_type2 = gene_TF_type[gene_TF_type["TF-Family"] != "--"].sort_values(["TF-Category", "TF-Family"])
gene_TF_type2['TF-Family2'] = gene_TF_type2['TF-Family'].str.split("\/", expand=True)[0]
gene_TF_type2['count'] = 1
gene_TF_type2['TForder'] = gene_TF_type2.groupby(['TF-Family2'])['count'].cumsum()

gene_TF_type2['TF-Family2'] = "SL" + gene_TF_type2['TF-Family2'] + "_" + gene_TF_type2['TForder'].astype(str)
gene_TF_type2[["gene", "TF-Category", "TF-Classification", "TF-Family", "TF-Family2"]].to_csv("696_TFR_in_7860gene_rename.csv", index=False)
gene_TF_type2.index = gene_TF_type2['gene']
gene_TF_type2.head(2)
[7]:
gene TF-Family TF-Category TF-Classification TF-Family2 count TForder
gene
Solyc02g092050.3 Solyc02g092050.3 AP2/ERF-AP2 TF AP2/ERF->AP2/ERF-AP2 SLAP2_1 1 1
Solyc04g077490.3 Solyc04g077490.3 AP2/ERF-AP2 TF AP2/ERF->AP2/ERF-AP2 SLAP2_2 1 2
[8]:
Suc_TFs_modules['TF_symbol'] = None
Suc_TFs_modules["TF_symbol"] = gene_TF_type2.loc[Suc_TFs_modules["TF"].to_list(), "TF-Family2"].to_list()
[9]:
Suc_TFs_modules.shape
[9]:
(722, 8)
[10]:
gene_TF_type2.head(2)
[10]:
gene TF-Family TF-Category TF-Classification TF-Family2 count TForder
gene
Solyc02g092050.3 Solyc02g092050.3 AP2/ERF-AP2 TF AP2/ERF->AP2/ERF-AP2 SLAP2_1 1 1
Solyc04g077490.3 Solyc04g077490.3 AP2/ERF-AP2 TF AP2/ERF->AP2/ERF-AP2 SLAP2_2 1 2
[11]:
Suc_knowledge33 = Suc_knowledge[Suc_knowledge.index.isin(gene_TF_type['gene'])]
#Suc_knowledge33['new_symbol']
[12]:
Suc_knowledge33.to_csv("Suc_knowledge33.csv")
[13]:
Suc_known_gene_regulon = Suc_TFs_modules_all.copy()
Suc_known_gene_regulon = Suc_known_gene_regulon[Suc_known_gene_regulon["target"].isin(Suc_knowledge33.index)]
[14]:
Suc_known_gene_regulon["meta_group"] = Suc_knowledge33.loc[Suc_known_gene_regulon["target"], "group"].to_list()
Suc_known_gene_regulon["TF_symbol"] = gene_TF_type2.loc[Suc_known_gene_regulon["TF"].to_list(), "TF-Family2"].to_list()
[15]:
Suc_known_gene_regulon.query('importance > 10').groupby(["TF_symbol", "meta_group"]).count()['target_symbol']
[15]:
TF_symbol         meta_group
SLC3H_5           metabolism    2
                  signaling     1
                  transport     1
SLMYB-related_11  transport     1
SLMYB-related_12  metabolism    2
SLPHD_4           metabolism    1
SLbHLH_1          transport     1
Name: target_symbol, dtype: int64
[16]:
Suc_known_gene_regulon_mat = Suc_known_gene_regulon.pivot(index="TF_symbol", columns="target_symbol", values="importance").fillna(0)
[17]:
col_color = pd.DataFrame(index=Suc_known_gene_regulon_mat.columns)
Suc_knowledge33_2 = Suc_knowledge33.copy()
Suc_knowledge33_2.index = Suc_knowledge33_2["symbol"]

col_color['Gene function'] = Suc_knowledge33_2.loc[col_color.index, "group"].to_list()

ccolors = sns.color_palette("Paired", len(col_color['Gene function'].unique())).as_hex()
lut = dict(zip(col_color['Gene function'].unique(), ccolors))
col_color['Pathway'] = col_color['Gene function'].map(lut)
col_color['Gene function'] = col_color['Gene function'].str.capitalize()
[ ]:

[18]:
#fig, ax = plt.subplots(1,1, figsize=(7,6))
import matplotlib.colors as colors


plt.rcParams["figure.figsize"] = [8, 8.2]
plt.rcParams["figure.autolayout"] = True

h = sns.clustermap(Suc_known_gene_regulon_mat, figsize=(10, 4),
                   cmap=sns.cubehelix_palette(start=2, rot=0, dark=0, light=.95, reverse=False, as_cmap=True),
               dendrogram_ratio=0.1,
               cbar_pos=(1, 0.7, 0.02, 0.2),
               col_colors=col_color[['Pathway']],
               #norm=colors.CenteredNorm(vcenter=0, halfrange=None, clip=False)
              )
"""
ax = plt.gca()

for i, row in TF_cols.iterrows():
    ax.scatter(None,None, label=row['TFFs'], color=row['color'])

ax.legend(loc=5)

ax.margins(0.15)
plt.axis("off")
plt.tight_layout()
"""

ax = plt.gca()
for i, row in col_color.drop_duplicates().iterrows():
    ax.scatter(None,None, color=row['Pathway'], label=row['Gene function'], marker='s')

ax.legend(loc="right", title='Pathway', bbox_to_anchor=(6.7, -0.8), ncol=1)
#h.add_legend()
#plt.tight_layout()

h.savefig("Suc_related_gene_TF_regulon_module_importance.pdf")

/Users/yuanzan/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py:65: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  self.figure.savefig(*args, **kwargs)
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/IPython/core/events.py:89: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  func(*args, **kwargs)
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/IPython/core/pylabtools.py:132: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  fig.canvas.print_figure(bytes_io, **kw)
../../_images/notebooks_GRN_TF_grn_network_19_1.png
[ ]:

[20]:
plt.rcParams["figure.figsize"] = [3, 3]
ax = sns.boxplot(Suc_known_gene_regulon['importance'])
ax.set_xlim(0, 1)
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
[20]:
(0.0, 1.0)
../../_images/notebooks_GRN_TF_grn_network_21_2.png
[ ]:

[21]:
def create_hc(G):
    """Creates hierarchical cluster of graph G from distance matrix"""
    path_length = nx.all_pairs_shortest_path_length(G)
    distances = np.zeros((len(G), len(G)))
    for u, p in path_length:
        for v, d in p.items():
            distances[u][v] = d
    # Create hierarchical cluster
    Y = distance.squareform(distances)
    Z = hierarchy.complete(Y)  # Creates HC using farthest point linkage
    # This partition selection is arbitrary, for illustrive purposes
    membership = list(hierarchy.fcluster(Z, t=1.15))
    # Create collection of lists for blockmodel
    partition = defaultdict(list)
    for n, p in zip(list(range(len(G))), membership):
        partition[p].append(n)
    return list(partition.values())


def subG(df):
    G = nx.Graph()
    G.clear()
    nodes = list(set(pd.concat([df["TF"], df["target"]], axis=0)))
    subG = nx.Graph()

    for i in nodes:
        G.add_node(i)
    for i,row in df.iterrows():
        G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])
    return G

def network_G_pos(grn_sub):
    G = nx.Graph()
    G.clear()

    U = nx.Graph()
    U.clear()

    TF_t = grn_sub[["TF", 'TF_type']]
    TF_t.columns = ['node', 'type']

    target_t = grn_sub[["target", 'target_type']]
    target_t.columns = ['node', 'type']

    nodes = pd.concat([TF_t, target_t], axis=0).drop_duplicates(keep='first')

    for i, row in nodes.iterrows():
        G.add_node(row['node'])

    for i, row in grn_sub.iterrows():
        G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])


    for i in grn_sub["TF"].unique():
        subdf = grn_sub[grn_sub["TF"]==i]
        sG = subG(subdf)
        U = nx.disjoint_union(U, sG)
        #break

    #### draw graph ####
    plt.figure(1, figsize=(8, 6))
    # layout graphs with positions using graphviz neato

    H = G.subgraph(next(nx.connected_components(G)))
    # Makes life easier to have consecutively labeled integer nodes
    H = nx.convert_node_labels_to_integers(H)
    # Create parititions with hierarchical clustering
    partitions = create_hc(H)
    # Build blockmodel graph
    BM = nx.quotient_graph(H, partitions, relabel=True)

    return nodes, U, G, H


#nodes, U, G, H = network_G_pos(Suc_TFs_modules)


def create_hc(G):
    """Creates hierarchical cluster of graph G from distance matrix"""
    path_length = nx.all_pairs_shortest_path_length(G)
    distances = np.zeros((len(G), len(G)))
    for u, p in path_length:
        for v, d in p.items():
            distances[u][v] = d
    # Create hierarchical cluster
    Y = distance.squareform(distances)
    Z = hierarchy.complete(Y)  # Creates HC using farthest point linkage
    # This partition selection is arbitrary, for illustrive purposes
    membership = list(hierarchy.fcluster(Z, t=1.15))
    # Create collection of lists for blockmodel
    partition = defaultdict(list)
    for n, p in zip(list(range(len(G))), membership):
        partition[p].append(n)
    return list(partition.values())


def subG(df):
    G = nx.Graph()
    G.clear()
    nodes = list(set(pd.concat([df["TF"], df["target"]], axis=0)))
    subG = nx.Graph()

    for i in nodes:
        G.add_node(i)
    for i,row in df.iterrows():
        G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])
    return G


[22]:
def network_G_pos2(grn_sub):
    G = nx.Graph()
    G.clear()

    #TF_t = grn_sub[["TF", 'TF_type']]
    TF_t = grn_sub[["TF_symbol", 'TF_type']]
    TF_t.columns = ['node', 'type']

    target_t = grn_sub[["target", 'target_type']]
    target_t.columns = ['node', 'type']

    nodes = pd.concat([TF_t, target_t], axis=0).drop_duplicates(keep='first')
    nodes.index = range(nodes.shape[0])

    for i, row in nodes.iterrows():
        G.add_node(row['node'])

    for i, row in grn_sub.iterrows():
        #G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])
        G.add_edge(row['TF_symbol'], row['target'], length=row['importance'], weight=row['importance'])

    """
    H = G.subgraph(next(nx.connected_components(G)))
    H = nx.convert_node_labels_to_integers(H)
    # Create parititions with hierarchical clustering
    partitions = create_hc(H)
    # Build blockmodel graph
    BM = nx.quotient_graph(H, partitions, relabel=True)
    """
    return nodes, G



nodes, G = network_G_pos2(Suc_TFs_modules)

[ ]:

[23]:
grnboost_sub = Suc_TFs_modules
pos = nx.nx_agraph.graphviz_layout(G, prog="neato")


plt.figure(1, figsize=(10, 8))

def plot_target_node(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()
    #nodelist = nodeDF.index
    nodes_s = nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=node_size, node_color=nodecolor, alpha=alpha)

def plot_TF_node(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()
    #nodelist = nodeDF.index
    nodes_s = nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_shape = 'h', node_size=node_size, node_color=nodecolor, alpha=alpha)


nodes1 = plot_target_node(G, pos, nodes, nodetype="gene", nodecolor="#2E7EB7", node_size=10)
nodes2 = plot_target_node(G, pos, nodes, nodetype="sugar", nodecolor="#581642", node_size=200)
#nodes3 = plot_target_node(G, pos, nodes.query('type=="energy"'), nodetype="energy", nodecolor="#DBB53E", node_size=10)
nodes4 = plot_TF_node(G, pos, nodes, nodetype="TF", nodecolor="#ED3833", node_size=100)
nodes5 = plot_TF_node(G, pos, nodes, nodetype="TR", nodecolor="#DBB53E", node_size=100)


def plot_TF_node(GG, poss, grn_DF, nodeDF, nodetype="sugar", nodecolor="red", node_size=20, alpha=0.4):
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()

    #target_count = pd.DataFrame(grn_DF.groupby(["TF"]).sum()['cc'])
    target_count = pd.DataFrame(grn_DF.groupby(["TF_symbol"]).sum()['cc'])
    aa = nodeDF.index.isin(target_count.index)==False
    notIntarget_count = nodeDF.index[aa].to_list()

    notIntarget_count = pd.DataFrame({'cc':[1]*len(notIntarget_count)}, index=notIntarget_count)
    target_count = target_count.append(notIntarget_count)

    #target_count.loc[notIntarget_count, 'cc'] = [1]
    #print(target_count)

    node_size = node_size * target_count.loc[nodeDF.index, 'cc']/6
    nodes_s = nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=node_size,
                                     node_color=nodecolor,
                                     #label=nodeDF.index.to_list(),
                                     alpha=alpha)

#nodes6 = plot_TF_node(H, pos, grnboost_sub, nodes.query('type=="TF" or type=="TR"'), nodetype="TF/TR", nodecolor="#581642", node_size=20)
#nodes7 = plot_TF_node(H, pos, grnboost_sub, nodes_df, nodetype="energy", nodecolor="#DBB53E", node_size=20)

"""
top7TFs = grnboost_sub[['TF', 'targetN']].sort_values(['targetN'], ascending=False).drop_duplicates().head(10)['TF'].to_list()
top7TFs = nodes_df.loc[top7TFs,:]

bbox={'facecolor': 'w', #填充色
      'edgecolor': 'None',#外框色
      'alpha': 0.5, #框透明度
      'pad': 2,#本文与框周围距离
     }

nx.draw_networkx_labels(H, pos, labels={row['range']:i for i,row in top7TFs.iterrows()},
                        font_size=10,
                        font_family="sans-serif",
                        font_color='k',
                        bbox=bbox
                        )


"""

nx.draw_networkx_labels(G, pos, labels={i:i for i in grnboost_sub["TF_symbol"].unique()},
                        font_size=10,
                        font_family="sans-serif",
                        font_color='k',
                        #bbox=bbox
                        )


edge_colors = range(int(grnboost_sub['importance'].min()), int(grnboost_sub['importance'].max()))


cmap = plt.cm.plasma


# edge_color="gainsboro",
edges = nx.draw_networkx_edges(G, pos, alpha=0.4, edge_color="gainsboro")

ax = plt.gca()
ax.scatter(None,None, label='Suc', color='#581642')
#ax.scatter(None,None, label='energy', color='#DBB53E')
ax.scatter(None,None, label='gene', color='#2E7EB7')
ax.scatter(None,None, label='TF', color='#ED3833')
ax.scatter(None,None, label='TR', color='#DBB53E')
ax.legend(loc=5)
ax.margins(0.15)
plt.axis("off")
plt.tight_layout()


#ax.set_axis_off()
#plt.show()
plt.savefig("Suc_related_regulon.pdf")

../../_images/notebooks_GRN_TF_grn_network_26_0.png
[230]:
#Suc_TFs_modules.to_csv("Suc_TFs_modules_20230314.csv", index=False)
[ ]:

[24]:
Suc_known_gene_regulon2plot = Suc_known_gene_regulon.query('importance>0.1')
[25]:
def network_G_pos3(grn_sub):
    G = nx.Graph()
    G.clear()

    TF_t = grn_sub[["TF_symbol", 'TF_type']]
    TF_t.columns = ['node', 'type']

    target_t = grn_sub[["target_symbol", 'target_group']]
    target_t.columns = ['node', 'type']

    nodes = pd.concat([TF_t, target_t], axis=0).drop_duplicates(keep='first')
    nodes.index = range(nodes.shape[0])
    #print(nodes)
    for i in nodes:
        G.add_node(i)

    for i, row in grn_sub.iterrows():
        #G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])
        G.add_edge(row['TF_symbol'], row['target_symbol'], length=row['importance'], weight=row['importance'])

    return nodes, G

nodes2, G2 = network_G_pos3(Suc_known_gene_regulon2plot)

[26]:
sns.color_palette("light:#5A9", as_cmap=True)
[26]:
blend
blend colormap
under
bad
over
[31]:
pos2 = nx.nx_agraph.graphviz_layout(G2, prog="twopi")


plt.figure(1, figsize=(7, 6.5))

def plot_target_node2(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    #print(nodeDF).shape
    #nodeDF = nodeDF.copy()
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()
    #nodelist = nodeDF.index
    nodes_s = nx.draw_networkx_nodes(GG, poss, nodelist=nodelist, node_size=node_size, node_color=nodecolor, alpha=alpha)

def plot_TF_node2(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    #print(nodeDF).shape
    #nodeDF = nodeDF.copy()
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()
    #nodelist = nodeDF.index
    nodes_s = nx.draw_networkx_nodes(GG, poss, nodelist=nodelist, node_shape = 'h',
                                     node_size=node_size, node_color=nodecolor, alpha=alpha)

nodes_1 = plot_TF_node2(G2, pos2, nodes2, nodetype="TF", nodecolor="#ED3833", node_size=400)
nodes_2 = plot_TF_node2(G2, pos2, nodes2, nodetype="TR", nodecolor="#DBB53E", node_size=400)

nodes_3 = plot_target_node2(G2, pos2, nodes2, nodetype="metabolism", nodecolor="#2E7EB7", node_size=300)
nodes_4 = plot_target_node2(G2, pos2, nodes2, nodetype="transport", nodecolor="#07CCD2", node_size=300)
nodes_5 = plot_target_node2(G2, pos2, nodes2, nodetype="signaling", nodecolor="#FDCCD6", node_size=300)



nx.draw_networkx_labels(G2, pos2, labels={i:i for i in nodes2["node"].unique()},
                        font_size=5,
                        font_family="sans-serif",
                        font_color='k',
                        #bbox=bbox
                        )


edge_colors = range(int(Suc_known_gene_regulon2plot['importance'].min()), int(Suc_known_gene_regulon2plot['importance'].max()))


cmap = plt.cm.plasma


# edge_color="gainsboro",
#edges2 = nx.draw_networkx_edges(G2, pos2, alpha=0.4, edge_color="gainsboro")

#nx.draw_networkx_edges(G2, pos2, alpha=0.4, edge_color=cmap)
edge_colors = Suc_known_gene_regulon2plot['importance']

cmap = sns.color_palette("ch:s=.25,rot=-.25", as_cmap=True)
#sns.color_palette("light:#5A9", as_cmap=True)
#plt.cm.Blues
options = {
    "edge_color": edge_colors,
    "width": 1,
    "edge_cmap": cmap,
}

edges = nx.draw_networkx_edges(G2, pos2, **options)
"""
pc = mpl.collections.PatchCollection(edges, cmap=cmap)
pc.set_array(edge_colors)

ax = plt.gca()
ax.set_axis_off()
plt.colorbar(pc, ax=ax)
"""


ax = plt.gca()


ax.scatter(None,None, label='signaling', color='#FDCCD6')
ax.scatter(None,None, label='transport', color='#07CCD2')
ax.scatter(None,None, label='metabolism', color='#2E7EB7')
ax.scatter(None,None, label='TF', color='#ED3833')
ax.scatter(None,None, label='TR', color='#DBB53E')
#ax.legend(loc=5)
ax.legend(loc="right", title='', bbox_to_anchor=(0.9, 0.3), ncol=1)

ax.margins(0.15)
plt.axis("off")
plt.tight_layout()

#ax.set_axis_off()
#plt.show()
plt.savefig("Suc_related_gene_TF_regulon_module_importance_network.pdf")

../../_images/notebooks_GRN_TF_grn_network_32_0.png

regulon heatmap#

[184]:
Suc_known_gene_regulon2plot
[184]:
TF target importance TF_type target_type target_group target_symbol meta_group TF_symbol
7365 Solyc07g026680.2 Solyc12g008510.2 30.169215 TF gene metabolism SlHXK3 metabolism SLMYB-related_12
8121 Solyc07g026680.2 Solyc04g081400.3 29.338428 TF gene metabolism SlHXK4 metabolism SLMYB-related_12
25753 Solyc02g082110.4 Solyc04g064630.3 18.921355 TF gene transport SlSWEET7 transport SLC3H_5
27260 Solyc02g082110.4 Solyc04g081400.3 18.414346 TF gene metabolism SlHXK4 metabolism SLC3H_5
27497 Solyc05g042030.3 Solyc08g042000.3 18.332312 TR gene metabolism SlSPS1 metabolism SLPHD_4
... ... ... ... ... ... ... ... ... ...
2007721 Solyc01g059950.1 Solyc03g121070.3 0.108084 TF gene metabolism SlHXK1 metabolism SLGRAS_16
2024254 Solyc07g026680.2 Solyc03g115700.3 0.106125 TF gene signaling SlSnRK1.2 signaling SLMYB-related_12
2046807 Solyc12g008830.3 Solyc02g071520.3 0.103508 TF gene transport SlSWEET3 transport SLC2C2-GATA_12
2064462 Solyc07g026680.2 Solyc03g097600.3 0.101504 TF gene transport SlSWEET5 transport SLMYB-related_12
2070101 Solyc05g042030.3 Solyc03g097610.3 0.100876 TR gene transport SlSWEET6 transport SLPHD_4

153 rows × 9 columns

[212]:
suc_tfs1 = pd.DataFrame(Suc_known_gene_regulon2plot[['TF', 'TF_symbol']])
suc_tfs1.columns = ['gene', 'symbol']
suc_target1 = pd.DataFrame(Suc_known_gene_regulon2plot[['target', 'target_symbol']])
suc_target1.columns = ['gene', 'symbol']

suc_tf_targets = pd.concat([suc_tfs1, suc_target1]).drop_duplicates().reset_index()
suc_tf_targets.loc[-1, :] = [11, "Suc", "Suc"]
suc_tf_targets.index = suc_tf_targets['gene']
[220]:
suc_tf_targets_genes = suc_tf_targets['gene'].to_list()
#suc_tf_targets_genes.append('Suc')

suc_tf_targets_genes_EXP = adata.to_df().loc[:, suc_tf_targets_genes]
suc_tf_targets_genes_EXP.columns = suc_tf_targets.loc[suc_tf_targets_genes_EXP.columns, "symbol"].to_list()
suc_tf_targets_genes_EXP = suc_tf_targets_genes_EXP.T

[ ]:

[238]:
adata.obs['sample'] = adata.obs.index
adata.obs['conditon_HAG'] = adata.obs['sample'].str.split("h-", expand=True)[0]
adata.obs['conditon'] = adata.obs['conditon_HAG'].str.split("[0-9]", expand=True)[0]
adata.obs['HAG'] = adata.obs['conditon_HAG'].str.lstrip('CD').astype(int)
adata.obs.head(2)
[238]:
sample conditon_HAG HAG conditon
sample
CD0h-1 CD0h-1 CD0 0 CD
CD0h-2 CD0h-2 CD0 0 CD
[255]:
p_df_meta1 = pd.concat([adata.obs.query('conditon=="C"'),
           adata.obs.query('conditon=="D"'),
           adata.obs.query('conditon=="CC"'),
           adata.obs.query('conditon=="DD"'),
           adata.obs.query('conditon=="CD"')], axis=0)
[ ]:

[263]:
p1_df = pd.concat([suc_tf_targets_genes_EXP.T.loc[p_df_meta1.index, :], p_df_meta1[["conditon_HAG"]]], axis=1).groupby(['conditon_HAG']).mean()
p1_df = p1_df.loc[p_df_meta1[['conditon_HAG']].drop_duplicates()['conditon_HAG'],:]
[277]:
import os
os.getcwd()
[277]:
'/Users/yuanzan/Documents/github/seqyuan/tomato_graft_omics/notebooks/GRN'
[275]:
#fig, ax = plt.subplots(1,1, figsize=(7,6))
import matplotlib.colors as colors


#plt.rcParams["figure.figsize"] = [8, 8.2]
plt.rcParams["figure.autolayout"] = True

h = sns.clustermap(p1_df.T, figsize=(15,8.2), cmap='vlag',
               dendrogram_ratio=0.1, col_cluster=False,
               cbar_pos=(1, 0.7, 0.02, 0.2), z_score=0,
               #col_colors=homo_metadata_31[['Class']],
               #norm=colors.CenteredNorm(vcenter=0, halfrange=None, clip=False)
              )

"""
ax = plt.gca()
for i, row in homo_metadata_31.drop_duplicates().iterrows():
    ax.scatter(None,None, color=row['Class'], label=row['class_origin'], marker='s')

ax.legend(loc="right", title='Class', bbox_to_anchor=(5, -0.8), ncol=1)
#h.add_legend()
#plt.tight_layout()
"""
h.savefig("suc_genes_exp.pdf")
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py:65: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  self.figure.savefig(*args, **kwargs)
../../_images/notebooks_GRN_TF_grn_network_43_1.png

CoA#

[332]:
coa_related_genes = """Solyc01g059880.3
Solyc01g073740.4
Solyc01g101040.3
Solyc04g039670.3
Solyc05g005160.3
Solyc07g055840.3
Solyc12g011000.2
Solyc12g099260.2""".split("\n")
[334]:
_TFs = grnboost.query('target=="Acetyl-CoA" and importance>=7')
print(_TFs.shape)

_TFs_modules = grnboost[grnboost["TF"].isin(_TFs['TF'])].query('target!="Acetyl-CoA"')
_TFs_modules = _TFs_modules.query('importance>=3')
print(_TFs_modules.shape)

_TFs_modules = _TFs_modules[_TFs_modules['target'].isin(coa_related_genes)]

print(_TFs_modules.shape)
(8, 3)
(4920, 3)
(9, 3)
[343]:
_TFs_modules_genes = _TFs_modules[['target']].drop_duplicates()['target'].to_list()
_TFs_modules_genes.append('Solyc02g082110.4')
_TFs_modules_genes.append('Acetyl-CoA')
_TFs_modules_genes_EXP = adata.to_df().loc[:, _TFs_modules_genes]

[344]:
p_coa_df = pd.concat([_TFs_modules_genes_EXP.loc[p_df_meta1.index, :], p_df_meta1[["conditon_HAG"]]], axis=1).groupby(['conditon_HAG']).mean()
p_coa_df = p_coa_df.loc[p_df_meta1[['conditon_HAG']].drop_duplicates()['conditon_HAG'],:]
[345]:
import matplotlib.colors as colors

plt.rcParams["figure.autolayout"] = True

h = sns.clustermap(p_coa_df.T, figsize=(15, 2), cmap='vlag',
               dendrogram_ratio=0.1, col_cluster=False, row_cluster=False,
               cbar_pos=(1, 0.7, 0.01, 0.2), z_score=0,
               #col_colors=homo_metadata_31[['Class']],
               #norm=colors.CenteredNorm(vcenter=0, halfrange=None, clip=False)
              )
h.savefig("CoA_genes_exp.pdf")
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py:65: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  self.figure.savefig(*args, **kwargs)
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/IPython/core/events.py:89: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  func(*args, **kwargs)
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/IPython/core/pylabtools.py:132: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  fig.canvas.print_figure(bytes_io, **kw)
../../_images/notebooks_GRN_TF_grn_network_49_1.png
[ ]:

[ ]:

network#

[99]:
CoA_TFs = grnboost.query('target=="Acetyl-CoA" and importance>=7')
print(CoA_TFs.shape)

CoA_TFs_modules = grnboost[grnboost["TF"].isin(CoA_TFs['TF'])].query('target!="Acetyl-CoA"')
CoA_TFs_modules = CoA_TFs_modules.query('importance>=3')
print(CoA_TFs_modules.shape)
CoA_TFs_modules = pd.concat([CoA_TFs_modules, CoA_TFs], axis=0)

CoA_TFs_modules["TF_type"] = adata.var.loc[CoA_TFs_modules["TF"], "type"].to_list()
CoA_TFs_modules["target_type"] = adata.var.loc[CoA_TFs_modules["target"], "type"].to_list()
#CoA_TFs_modules = CoA_TFs_modules.query('target_type!="energy"')

#CoA_TFs_modules.head(2)
(8, 3)
(4920, 3)
[100]:
CoA_TFs_modules['TF_symbol'] = None
CoA_TFs_modules["TF_symbol"] = gene_TF_type2.loc[CoA_TFs_modules["TF"].to_list(), "TF-Family2"].to_list()
[101]:
#CoA_TFs_modules.query('importance > 20')
CoA_TFs_modules = CoA_TFs_modules.query('importance>=30 or target=="Acetyl-CoA"')
print(CoA_TFs_modules.shape)

CoA_TFs_modules.loc[CoA_TFs_modules[CoA_TFs_modules['target_type']=="energy"].index, "target_type"] = "Acetyl-CoA"

CoA_TFs_modules.head(2)

(192, 6)
[101]:
TF target importance TF_type target_type TF_symbol
12 Solyc02g082110.4 Solyc01g110290.3 73.238874 TF gene SLC3H_5
24 Solyc02g082110.4 Solyc09g074100.4 68.823340 TF gene SLC3H_5
[134]:
CoA_TFs_modules.to_csv("CoA_TFs_modules_20230316.csv")
[102]:
nodes_CoA, G_CoA = network_G_pos2(CoA_TFs_modules)
[125]:
pos_CoA = nx.nx_agraph.graphviz_layout(G_CoA, prog="twopi")
[133]:
plt.figure(1, figsize=(7, 6))

def plot_target_node2(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    #print(nodeDF).shape
    #nodeDF = nodeDF.copy()
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()
    #nodelist = nodeDF.index
    nodes_s = nx.draw_networkx_nodes(GG, poss, nodelist=nodelist, node_size=node_size, node_color=nodecolor, alpha=alpha)

def plot_TF_node2(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    #print(nodeDF).shape
    #nodeDF = nodeDF.copy()
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['node'].to_list()
    #nodelist = nodeDF.index
    nodes_s = nx.draw_networkx_nodes(GG, poss, nodelist=nodelist, node_shape = 'h',
                                     node_size=node_size, node_color=nodecolor, alpha=alpha)

nodes_1 = plot_TF_node2(G_CoA, pos_CoA, nodes_CoA, nodetype="TF", nodecolor="#ED3833", node_size=30)
nodes_2 = plot_TF_node2(G_CoA, pos_CoA, nodes_CoA, nodetype="TR", nodecolor="#DBB53E", node_size=30)

nodes3 = plot_target_node2(G_CoA, pos_CoA, nodes_CoA, nodetype="gene", nodecolor="#2E7EB7", node_size=30)

nodes4 = plot_target_node2(G_CoA, pos_CoA, nodes_CoA, nodetype="Acetyl-CoA", nodecolor="#581642", node_size=500, alpha=1)

TF_genes = gene_TF_type2.loc[CoA_TFs["TF"].to_list(), "TF-Family2"].to_list()

TFs = nx.draw_networkx_nodes(G_CoA, pos_CoA, nodelist=TF_genes, node_shape = 'h',
                                     node_size=300, node_color='#ED3833', alpha=1)





nx.draw_networkx_labels(G_CoA, pos_CoA, labels={i:i for i in TF_genes},
                        font_size=8,
                        font_family="sans-serif",
                        font_color='k',
                        #bbox=bbox
                        )
nx.draw_networkx_labels(G_CoA, pos_CoA, labels={"Acetyl-CoA": "Acetyl-CoA"},
                        font_size=8,
                        font_family="sans-serif",
                        font_color='k',
                        #bbox=bbox
                        )

#cmap = sns.color_palette("ch:s=.25,rot=-.25", as_cmap=True)
#sns.color_palette("light:#5A9", as_cmap=True)
#plt.cm.Blues

edge_colors = CoA_TFs_modules['importance']

cmap = sns.color_palette("ch:s=.25,rot=-.25", as_cmap=True)
options = {
    "edge_color": edge_colors,
    "width": 1,
    "edge_cmap": cmap,
}

edges = nx.draw_networkx_edges(G_CoA, pos_CoA, **options)

ax = plt.gca()

ax.scatter(None,None, label='Acetyl-CoA', color='#581642')
ax.scatter(None,None, label='TF', color='#ED3833')
ax.scatter(None,None, label='TR', color='#DBB53E')
ax.scatter(None,None, label='gene', color='#2E7EB7')

#ax.legend(loc=5)
ax.legend(loc="right", title='', bbox_to_anchor=(1, 0.45), ncol=1)

ax.margins(0.15)
plt.axis("off")
plt.tight_layout()

#ax.set_axis_off()
#plt.show()
plt.savefig("Acetyl-CoA_related_gene_TF_regulon_module_importance_network.pdf")

../../_images/notebooks_GRN_TF_grn_network_59_0.png
[85]:
edge_colors
[85]:
12       73.238874
24       68.823340
105      61.024120
238      55.667554
323      53.798785
           ...
38449    15.234534
48596    13.095217
49595    12.906214
82303     8.633197
85580     8.322737
Name: importance, Length: 579, dtype: float64
[ ]:

[ ]:

[ ]:

[52]:
CoA_genes = pd.read_table("Acetyl-CoA_genes.txt", index_col=0)
CoA_genes.head(2)
[52]:
symbol
ID
Solyc02g094640.4 SlACS1
Solyc07g017860.3 SlACS2
[58]:
CoA_genes.index[CoA_genes.index.isin(grnboost['target'])]
[58]:
Index(['Solyc05g024160.3'], dtype='object', name='ID')
[59]:
grnboost[grnboost['target'].isin(CoA_genes.index)]
[59]:
TF target importance
15060 Solyc03g123430.4 Solyc05g024160.3 23.900486
28454 Solyc03g111260.3 Solyc05g024160.3 18.029049
33600 Solyc06g053960.3 Solyc05g024160.3 16.465147
36744 Solyc02g093590.3 Solyc05g024160.3 15.651089
43705 Solyc06g069430.3 Solyc05g024160.3 14.058262
... ... ... ...
4610196 Solyc05g052410.3 Solyc05g024160.3 0.000378
4631663 Solyc02g077590.1 Solyc05g024160.3 0.000202
4650205 Solyc02g067800.4 Solyc05g024160.3 0.000071
4651625 Solyc02g071220.3 Solyc05g024160.3 0.000062
4652281 Solyc10g054070.1 Solyc05g024160.3 0.000058

618 rows × 3 columns

[ ]:

[ ]:

regulon go function plot#

[161]:
SUC_regulon_GO = pd.read_csv("SUC_regulon_GO.csv")
SUC_regulon_GO['TF_regulon'] = gene_TF_type2.loc[SUC_regulon_GO['TF'], "TF-Family2"].to_list()
SUC_regulon_GO = SUC_regulon_GO.sort_values(['TF_regulon', 'pvalue'], ascending=True)
SUC_regulon_GO['-log(pvalue)'] = -np.log(SUC_regulon_GO['pvalue'])
[281]:
SUC_regulon_GO['TF_regulon'].unique()
[281]:
array(['SLAP2_41', 'SLC3H_5', 'SLTify_5'], dtype=object)
[279]:
TF = "SLbHLH_1"
ax = sns.barplot(data=SUC_regulon_GO.query('TF_regulon=="{0}"'.format(TF)).head(10),
            x="-log(pvalue)", y="Description", color="b", alpha=0.4
           )
ax.set_ylabel("")
ax.set_title(TF)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-279-131f9ff0513a> in <module>
      1 TF = "SLbHLH_1"
----> 2 ax = sns.barplot(data=SUC_regulon_GO.query('TF_regulon=="{0}"'.format(TF)).head(10),
      3             x="-log(pvalue)", y="Description", color="b", alpha=0.4
      4            )
      5 ax.set_ylabel("")

~/anaconda3/lib/python3.8/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
     44             )
     45         kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46         return f(**kwargs)
     47     return inner_f
     48

~/anaconda3/lib/python3.8/site-packages/seaborn/categorical.py in barplot(x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge, ax, **kwargs)
   3180 ):
   3181
-> 3182     plotter = _BarPlotter(x, y, hue, data, order, hue_order,
   3183                           estimator, ci, n_boot, units, seed,
   3184                           orient, color, palette, saturation,

~/anaconda3/lib/python3.8/site-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)
   1584         self.establish_variables(x, y, hue, data, orient,
   1585                                  order, hue_order, units)
-> 1586         self.establish_colors(color, palette, saturation)
   1587         self.estimate_statistic(estimator, ci, n_boot, seed)
   1588

~/anaconda3/lib/python3.8/site-packages/seaborn/categorical.py in establish_colors(self, color, palette, saturation)
    317         # Determine the gray color to use for the lines framing the plot
    318         light_vals = [colorsys.rgb_to_hls(*c)[1] for c in rgb_colors]
--> 319         lum = min(light_vals) * .6
    320         gray = mpl.colors.rgb2hex((lum, lum, lum))
    321

ValueError: min() arg is an empty sequence

ATP module#

[285]:
atp_genes = """Solyc03g115110.4
Solyc04g007550.4
Solyc04g072047.1
Solyc05g008460.4
Solyc05g050500.1
Solyc05g052140.3
Solyc05g052150.4
Solyc06g005010.1
Solyc06g005020.3
Solyc06g009920.2
Solyc06g048430.4
Solyc06g063220.3
Solyc06g065990.1
Solyc06g066000.3
Solyc11g063573.1
Solyc11g072450.2
Solyc12g005060.1
Solyc12g006210.1
Solyc12g010870.3
Solyc12g055760.2
Solyc12g056830.1""".split("\n")
[322]:
ATP_TFs = grnboost.query('target=="ATP" and importance>=6')
ATP_TFs_modules = grnboost[grnboost["TF"].isin(ATP_TFs['TF'])].query('target!="ATP"')
ATP_TFs_modules = ATP_TFs_modules.query('importance>=2')

ATP_TFs_modules = ATP_TFs_modules[ATP_TFs_modules['target'].isin(atp_genes)]

print(ATP_TFs_modules.shape)
(12, 3)
[324]:
ATP_TFs_modules_genes = ATP_TFs_modules[['target']].drop_duplicates()['target'].to_list()
ATP_TFs_modules_genes.append('ATP')
ATP_TFs_modules_genes_EXP = adata.to_df().loc[:, ATP_TFs_modules_genes]

[325]:
p_atp_df = pd.concat([ATP_TFs_modules_genes_EXP.loc[p_df_meta1.index, :], p_df_meta1[["conditon_HAG"]]], axis=1).groupby(['conditon_HAG']).mean()
p_atp_df = p_atp_df.loc[p_df_meta1[['conditon_HAG']].drop_duplicates()['conditon_HAG'],:]
[331]:
import matplotlib.colors as colors

plt.rcParams["figure.autolayout"] = True

h = sns.clustermap(p_atp_df.T, figsize=(15, 2), cmap='vlag',
               dendrogram_ratio=0.1, col_cluster=False, row_cluster=False,
               cbar_pos=(1, 0.7, 0.01, 0.2), z_score=0,
               #col_colors=homo_metadata_31[['Class']],
               #norm=colors.CenteredNorm(vcenter=0, halfrange=None, clip=False)
              )
h.savefig("ATP_genes_exp.pdf")
/Users/yuanzan/anaconda3/lib/python3.8/site-packages/seaborn/axisgrid.py:65: UserWarning: Tight layout not applied. tight_layout cannot make axes width small enough to accommodate all axes decorations
  self.figure.savefig(*args, **kwargs)
../../_images/notebooks_GRN_TF_grn_network_78_1.png
[ ]:

[ ]: