sugar_corr_gene_exp_pre_4heatmap1

Contents

sugar_corr_gene_exp_pre_4heatmap1#

[2]:

import pandas as pd
gene_TF_raw = pd.read_excel("热图1.xlsx", sheet_name=0, index_col=0)
gene_TF = gene_TF_raw.copy()
del gene_TF["ID"]

/Users/yuanzan/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py:20: UserWarning: Pandas requires version '2.7.3' or newer of 'numexpr' (version '2.7.1' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED

[3]:

gene_TF_meta = pd.DataFrame(gene_TF.columns)
gene_TF_meta.columns = ["sample"]
gene_TF_meta.index = gene_TF_meta["sample"]
gene_TF_meta['condition_HAG'] = gene_TF_meta["sample"].str.split("h-", expand=True)[0]

import re
def get_condition(series):
    return re.split("(\d+)", series['condition_HAG'])[0]

def get_HAG(series):
    return re.split("(\d+)", series['condition_HAG'])[1]

gene_TF_meta['Condition'] = gene_TF_meta.apply(get_condition, axis=1)
gene_TF_meta['HAG'] = gene_TF_meta.apply(get_HAG, axis=1)

gene_TF_meta = gene_TF_meta[['condition_HAG', 'Condition', 'HAG']]



gene_TF['gene'] = gene_TF.index
gene_TF = gene_TF.melt(id_vars=['gene'])
gene_TF['condition_HAG'] = gene_TF_meta.loc[gene_TF['variable'], "condition_HAG"].to_list()
gene_TF = gene_TF.groupby(['gene', 'condition_HAG']).mean()["value"].reset_index()
gene_TF = pd.crosstab(gene_TF['gene'], gene_TF['condition_HAG'], aggfunc='max',values=gene_TF['value'].to_list())

<ipython-input-3-470d327d358b>:23: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  gene_TF = gene_TF.groupby(['gene', 'condition_HAG']).mean()["value"].reset_index()

[4]:

gene_TF_meta2 = pd.DataFrame(gene_TF.columns)
gene_TF_meta2.columns = ["sample"]

def get_condition2(series):
    return re.split("(\d+)", series['sample'])[0]

def get_HAG2(series):
    return re.split("(\d+)", series['sample'])[1]

gene_TF_meta2['Condition'] = gene_TF_meta2.apply(get_condition2, axis=1)
gene_TF_meta2['HAG'] = gene_TF_meta2.apply(get_HAG2, axis=1)
gene_TF_meta2['HAG'] = gene_TF_meta2['HAG'].astype(int)
gene_TF_meta2 = gene_TF_meta2.sort_values(['Condition', 'HAG'])

gene_TF = gene_TF.loc[gene_TF_raw.index, gene_TF_meta2['sample']]

[5]:

gene_TF.to_csv('heatmap1.csv', sep="\t")
gene_TF_meta2.to_csv('heatmap1_meta.csv', sep="\t")

other useless code#

[7]:

corr_df = gene_TF.T
corr_df['sample'] = corr_df.index
corr_df = corr_df.melt(id_vars=['Sucrose', 'sample'])

gene_TF_meta3 = gene_TF_meta2.copy()
gene_TF_meta3.index = gene_TF_meta3["sample"]
corr_df['Condition'] = gene_TF_meta3.loc[corr_df["sample"], "Condition"].to_list()

import numpy as np
corr_df['logdata'] = np.log2(corr_df['value']+0.01)

import seaborn as sns

sns.lmplot(x="Sucrose",
           y="logdata",
           hue="Condition",
           #lowess=True,
           data=corr_df)

[7]:

<seaborn.axisgrid.FacetGrid at 0x7fc9e09a5340>

../../_images/notebooks_heatmap12_sugar_corr_gene_exp_pre_4heatmap1_6_1.png

[ ]: