style 3

Contents

[5]:

import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib as mpl
mpl.rcParams['pdf.fonttype']=42
mpl.rcParams['ps.fonttype']=42

import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib inline

import scanpy as sc
import anndata as ad


from scipy.cluster import hierarchy
from scipy.spatial import distance
from collections import defaultdict

[7]:

grnboost = pd.read_table("/Users/yuanzan/Documents/github/seqyuan/tomato_graft_omics/data/grn/grn_notbook/gene_energy_sugar_grnboost2_addtype.csv", sep="\t")
adata = ad.read_h5ad("/Users/yuanzan/Documents/github/seqyuan/tomato_graft_omics/data/grn/grn_notbook/CC_DD_Exp_energy_sugar.h5ad")

[8]:

grnboost_sub = grnboost.query('importance>=35')

grnboost_sub['targetN'] = grnboost_sub.groupby(["TF"]).sum()['cc'][grnboost_sub['TF']].to_list()
grnboost_sub = grnboost_sub.query('targetN>=6')
grnboost_sub['targetN'] = grnboost_sub.groupby(["TF"]).sum()['cc'][grnboost_sub['TF']].to_list()

grnboost_sub = grnboost_sub.sort_values(by=['targetN'])
#grnboost_sub['dupTarget'] = grnboost_sub.duplicated('target')

<ipython-input-8-96841fc8ce7a>:3: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  grnboost_sub['targetN'] = grnboost_sub.groupby(["TF"]).sum()['cc'][grnboost_sub['TF']].to_list()
<ipython-input-8-96841fc8ce7a>:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grnboost_sub['targetN'] = grnboost_sub.groupby(["TF"]).sum()['cc'][grnboost_sub['TF']].to_list()
<ipython-input-8-96841fc8ce7a>:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  grnboost_sub['targetN'] = grnboost_sub.groupby(["TF"]).sum()['cc'][grnboost_sub['TF']].to_list()

[9]:

grnboost_sub.groupby(["TF"]).sum()['cc']

<ipython-input-9-2367075c054e>:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  grnboost_sub.groupby(["TF"]).sum()['cc']

[9]:

TF
2-Phospho-D-glycerate          10
ADP                           103
AMP                            13
ATP                           151
Acetyl-CoA                    151
Adenine                         9
Ara                            14
Beta-Leucine                   21
Citric-acid                   122
D-Erythrose-4-phosphate        16
D-Glucose-1-phosphate          19
D-Glucose-6-phosphate          10
D-Ribulose-5-phosphate          8
Dihydroxyacetone-phosphate     14
Fru                            25
Gal                            50
Glycerol-3-phosphate           11
L-Alanine                      50
L-Arginine                     19
L-Asparagine                   44
L-Citrulline                    9
L-Glutamate                    32
L-Lactate                       8
L-Threonine                    12
L-Tyrosine                    127
Lysine                        105
Phosphoenolpyruvic-acid        62
Phosphorylethanolamine        450
Pyruvic-acid                   11
Rha                            51
Sedoheptulose-7-phosphate       9
Suc                           790
Succinic-acid                  60
UDP-GlcNAc                     12
UMP                            11
Uracil                        160
cyclic-AMP                     52
Name: cc, dtype: int64

[18]:

grnboost_sub.groupby(["TF", 'target_type']).sum().loc['Suc',:]

<ipython-input-18-f7e8530505ac>:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  grnboost_sub.groupby(["TF", 'target_type']).sum().loc['Suc',:]

[18]:

	importance	cc	targetN
target_type
TF	2452.356619	55	43450
TR	471.433371	11	8690
gene	31330.979607	724	571960

[21]:

grnboost_sub.to_csv('../../data/grn/suc_grnboost_importance35_targetN6.csv', sep="\t", index=False)

[24]:

grnboost_sub.query('TF=="Suc" and (target_type=="TF" or target_type=="TR")').to_csv('../../data/grn/suc_importance35_targetN6.csv', sep="\t", index=False)

[25]:

grnboost_sub.query('TF=="Suc" and (target_type=="TF" or target_type=="TR")')

[25]:

	TF	target	importance	cc	TF_type	target_type	targetN
2501	Suc	Solyc10g076460.2	35.962453	1	sugar	TF	790
412	Suc	Solyc05g052610.3	49.789240	1	sugar	TF	790
2480	Suc	Solyc04g078770.4	36.027338	1	sugar	TF	790
564	Suc	Solyc04g080740.1	47.305903	1	sugar	TF	790
474	Suc	Solyc04g009440.3	48.620021	1	sugar	TF	790
...	...	...	...	...	...	...	...
884	Suc	Solyc06g005680.4	44.026730	1	sugar	TF	790
983	Suc	Solyc02g092460.3	43.194171	1	sugar	TR	790
982	Suc	Solyc09g090130.3	43.203793	1	sugar	TF	790
975	Suc	Solyc01g109970.3	43.284457	1	sugar	TR	790
953	Suc	Solyc10g078720.2	43.410027	1	sugar	TF	790

66 rows × 7 columns

[ ]:

[ ]:

[10]:

nodes = list(set(pd.concat([grnboost_sub["TF"], grnboost_sub["target"]], axis=0)))
nodes = adata.var.loc[nodes,:]
nodes.head(2)

[10]:

	type
Solyc06g082070.3	gene
Solyc06g076350.3	TF

[11]:

def subG(df):
    G = nx.Graph()
    G.clear()
    nodes = list(set(pd.concat([df["TF"], df["target"]], axis=0)))
    subG = nx.Graph()

    for i in nodes:
        G.add_node(i)
    for i,row in df.iterrows():
        G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])
    return G

[ ]:

[152]:

def create_hc(G):
    """Creates hierarchical cluster of graph G from distance matrix"""
    path_length = nx.all_pairs_shortest_path_length(G)
    distances = np.zeros((len(G), len(G)))
    for u, p in path_length:
        for v, d in p.items():
            distances[u][v] = d
    # Create hierarchical cluster
    Y = distance.squareform(distances)
    Z = hierarchy.complete(Y)  # Creates HC using farthest point linkage
    # This partition selection is arbitrary, for illustrive purposes
    membership = list(hierarchy.fcluster(Z, t=1.15))
    # Create collection of lists for blockmodel
    partition = defaultdict(list)
    for n, p in zip(list(range(len(G))), membership):
        partition[p].append(n)
    return list(partition.values())

[170]:

import matplotlib.colors as mcolors
keys = list(mcolors.CSS4_COLORS)

[174]:

#keys

[272]:

G = nx.Graph()
G.clear()

nodes = list(set(pd.concat([grnboost_sub["TF"], grnboost_sub["target"]], axis=0)))
nodes_df = adata.var.loc[nodes,:]
nodes_df['range'] = range(nodes_df.shape[0])

for i in nodes_df.index:
    G.add_node(i)

for i, row in grnboost_sub.iterrows():
    G.add_edge(row['TF'], row['target'], length=row['importance'], weight=row['importance'])


for i in grnboost_sub["TF"].unique():
    subdf = grnboost_sub[grnboost_sub["TF"]==i]
    sG = subG(subdf)
    U = nx.disjoint_union(U, sG)
    #break

#### draw graph ####
plt.figure(1, figsize=(8, 8))
# layout graphs with positions using graphviz neato

H = G.subgraph(next(nx.connected_components(G)))
# Makes life easier to have consecutively labeled integer nodes
H = nx.convert_node_labels_to_integers(H)
# Create parititions with hierarchical clustering
partitions = create_hc(H)
# Build blockmodel graph
BM = nx.quotient_graph(H, partitions, relabel=True)

# Draw original graph
pos = nx.spring_layout(H, iterations=20, seed=83)  # Seed for reproducibility






nx.draw(H, pos, with_labels=False, node_size=10, edge_color="gainsboro", alpha=0.4)

../../_images/notebooks_network_to_network_16_0.png

[ ]:

[325]:

pos = nx.nx_agraph.graphviz_layout(U, prog="neato")



plt.figure(1, figsize=(10, 8))

def plot_target_node(GG, poss, nodeDF, nodetype="gene", nodecolor="#2E7EB7", node_size=20, alpha=0.4):
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['range'].to_list()
    nodes_s = nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=node_size, node_color=nodecolor, alpha=alpha)

nodes1 = plot_target_node(H, pos, nodes_df[nodes_df.index.isin(grn_DF['TF'])==False], nodetype="gene", nodecolor="#2E7EB7", node_size=10)
nodes2 = plot_target_node(H, pos, nodes_df[nodes_df.index.isin(grn_DF['TF'])==False], nodetype="sugar", nodecolor="#581642", node_size=10)
nodes3 = plot_target_node(H, pos, nodes_df, nodetype="energy", nodecolor="#DBB53E", node_size=10)
nodes4 = plot_target_node(H, pos, nodes_df, nodetype="TF", nodecolor="#ED3833", node_size=10)
nodes5 = plot_target_node(H, pos, nodes_df, nodetype="TR", nodecolor="#ED3833", node_size=10)


def plot_TF_node(GG, poss, grn_DF, nodeDF, nodetype="sugar", nodecolor="red", node_size=20, alpha=0.4):
    nodeDF = nodeDF[nodeDF["type"]==nodetype]
    nodelist = nodeDF['range'].to_list()

    target_count = pd.DataFrame(grn_DF.groupby(["TF"]).sum()['cc'])
    aa = nodeDF.index.isin(target_count.index)==False
    notIntarget_count = nodeDF.index[aa].to_list()

    notIntarget_count = pd.DataFrame({'cc':[1]*len(notIntarget_count)}, index=notIntarget_count)
    target_count = target_count.append(notIntarget_count)

    #target_count.loc[notIntarget_count, 'cc'] = [1]
    #print(target_count)

    node_size = node_size * target_count.loc[nodeDF.index, 'cc']/6
    nodes_s = nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=node_size,
                                     node_color=nodecolor,
                                     #label=nodeDF.index.to_list(),
                                     alpha=alpha)

nodes6 = plot_TF_node(H, pos, grnboost_sub, nodes_df, nodetype="sugar", nodecolor="#581642", node_size=20)
nodes7 = plot_TF_node(H, pos, grnboost_sub, nodes_df, nodetype="energy", nodecolor="#DBB53E", node_size=20)

top7TFs = grnboost_sub[['TF', 'targetN']].sort_values(['targetN'], ascending=False).drop_duplicates().head(10)['TF'].to_list()
top7TFs = nodes_df.loc[top7TFs,:]

bbox={'facecolor': 'w', #填充色
      'edgecolor': 'None',#外框色
      'alpha': 0.5, #框透明度
      'pad': 2,#本文与框周围距离
     }

nx.draw_networkx_labels(H, pos, labels={row['range']:i for i,row in top7TFs.iterrows()},
                        font_size=10,
                        font_family="sans-serif",
                        font_color='k',
                        bbox=bbox
                        )




edge_colors = grnboost_sub['importance'].to_list()
cmap = plt.cm.plasma

edges = nx.draw_networkx_edges(H, pos, edge_color="gainsboro", alpha=0.4)

ax = plt.gca()
ax.scatter(None,None, label='sugar', color='#581642')
ax.scatter(None,None, label='energy', color='#DBB53E')
ax.scatter(None,None, label='gene', color='#2E7EB7')
ax.scatter(None,None, label='TF/TR', color='#ED3833')
ax.legend(loc=5)
ax.margins(0.15)
plt.axis("off")
plt.tight_layout()


#ax.set_axis_off()
#plt.show()
plt.savefig("tomato_geneExp_energy_sugar_Comodule_network.pdf")

/tmp/ipykernel_1036807/2239920485.py:19: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  target_count = pd.DataFrame(grn_DF.groupby(["TF"]).sum()['cc'])
/tmp/ipykernel_1036807/2239920485.py:24: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  target_count = target_count.append(notIntarget_count)
/tmp/ipykernel_1036807/2239920485.py:19: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  target_count = pd.DataFrame(grn_DF.groupby(["TF"]).sum()['cc'])
/tmp/ipykernel_1036807/2239920485.py:24: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  target_count = target_count.append(notIntarget_count)

../../_images/notebooks_network_to_network_18_1.png

[355]:

style 3#

[ ]:

[465]:

grnboost_sub_bar = grnboost.query('importance>=35')
grnboost_sub_bar.loc[grnboost_sub_bar[grnboost_sub_bar['target_type']=='TF'].index, "target_type"] = 'TF/TR'
grnboost_sub_bar.loc[grnboost_sub_bar[grnboost_sub_bar['target_type']=='TR'].index, "target_type"] = 'TF/TR'
grnboost_sub_bar = pd.DataFrame(grnboost_sub_bar.groupby(["TF", 'TF_type', 'target_type']).sum()['cc'])
grnboost_sub_bar = grnboost_sub_bar.reset_index()
grnboost_sub_bar = grnboost_sub_bar.sort_values(by=['cc', 'target_type'], ascending=False)
grnboost_sub_bar.columns = ['Sugar_Energy', 'Sugar_Energy_type', 'Gene/metabolite class', 'Cooperative dynamic\nGene/metabolite number']

/tmp/ipykernel_1036807/3719034143.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  grnboost_sub_bar = pd.DataFrame(grnboost_sub_bar.groupby(["TF", 'TF_type', 'target_type']).sum()['cc'])

states#

[467]:

fig, (ax1, ax2) = plt.subplots(2, 1, sharey=False, dpi=100, figsize=(5,4))
fig.subplots_adjust(hspace=0.1)


palette=['#2E7EB7', '#ED3833', '#581642', '#DBB53E']
sns.barplot(ax=ax1, data=grnboost_sub_bar.query('Sugar_Energy_type=="sugar"'), color=palette, palette=palette,
            x="Sugar_Energy", y="Cooperative dynamic\nGene/metabolite number", hue="Gene/metabolite class")
sns.barplot(ax=ax2, data=grnboost_sub_bar.query('Sugar_Energy_type=="sugar"'), color=palette, palette=palette,
            x="Sugar_Energy", y="Cooperative dynamic\nGene/metabolite number", hue="Gene/metabolite class")

ax2.set_ylim(0, 80)  #  子图1设置y轴范围，只显示部分图
ax1.set_ylim(700, 750)  # 子图2设置y轴范围，只显示部分图

ax1.legend(loc=5)
ax2.margins(0.03)
ax1.margins(0.03)
ax1.set_xticks([])
ax1.set_xlabel('')
ax2.set_xlabel('Sugar')
ax1.set_ylabel('')
ax2.set_ylabel('')

ax2.get_legend().remove()

ax1.spines['bottom'].set_visible(False)#关闭子图1中底部脊
ax2.spines['top'].set_visible(False)##关闭子图2中顶部脊


#ax = plt.add_subplot(facecolor=None)

#ax.set_axis_off()1.set_title
#ax1.set_ylabel('Cooperative dynamic\nGene/metabolite number', loc="bottom", halign='center')
#fig.align_labels()
ax1.set_title('Cooperative dynamic\nGene/metabolite number')
#ax.set_xticks([])
#ax.set_yticks([])

fig.savefig('sugar__Cooperative_dynamic_Gene_metabolite_number.pdf')

../../_images/notebooks_network_to_network_24_0.png

[ ]:

[472]:

fig, ax = plt.subplots(1, 1, sharey=False, dpi=100, figsize=(5,8))
#fig.subplots_adjust(hspace=0.1)

ax = sns.barplot(ax=ax, data=grnboost_sub_bar.query('Sugar_Energy_type=="energy"'), color=palette, palette=palette,
                 y="Sugar_Energy", x="Cooperative dynamic\nGene/metabolite number",
                 hue="Gene/metabolite class")

ax.legend(loc=8)
ax.margins(0.03)
ax.set_ylabel('Energy')
plt.tight_layout()
fig.savefig('energy__Cooperative_dynamic_Gene_metabolite_number.pdf')

../../_images/notebooks_network_to_network_26_0.png

[469]:

grnboost_sub_bar.to_csv("sugar_energy_coDynamic_grnboost_module.csv", index=False)

[ ]:

[ ]: