import numpy as np
import pandas as pd
from cov_functions import *
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
from IPython.display import display, Markdown, Latex
df = pd.read_csv('../../data/session1/clean_dataset.csv')
df.describe()
g = sns.distplot(df.loc[:,["sr_age"]], bins=15, color="blue", hist=True, kde=False).set_title('Age')
a = np.squeeze((df.loc[:,["sr_gender"]]))
g = sns.countplot(a).set_title('Gender')
a = np.squeeze((df.loc[:,["q6_me_inf"]]))
g = sns.countplot(a).set_title('Were you infected?')
ax = plt.gca()
ax.set_xticklabels(["No", "Yes"])
for p in ax.patches:
ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+3))
a = np.squeeze((df.loc[:,["q6_close_person_inf"]]))
g = sns.countplot(a).set_title('Was a close person to you infected?')
ax = plt.gca()
ax.set_xticklabels(["No", "Yes"])
for p in ax.patches:
ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+3))
a = np.squeeze((df.loc[:,["q6_close_person_died"]]))
g = sns.countplot(a).set_title('Did a close person die/get very serious?')
ax = plt.gca()
ax.set_xticklabels(["No", "Yes"])
for p in ax.patches:
ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+3))
a = (df.loc[:,"q6_econ_impact_me":"q6_risk_group_closep"])
s_str = ["Economic impact me", "Econ impact close person", "Currently work from home", "Apply social distancing", "I belong to risk group", "Close person belongs to risk group"]
a
fig, ax = plt.subplots(2, 3, figsize=(20, 10))
for idx, (col, sf) in enumerate(zip(a, ax.flatten())):
g = sns.distplot(a[col], ax=sf, bins=7, color="darkblue", hist=True, kde=False,
hist_kws={'range':(1,7), 'edgecolor':'black', 'alpha':0.8},)
g.set_title("Objective measures")
sf.set_xticks([1,7])
sf.set_xticklabels(["Strongly disagree", "Strongly Agree"])
sf.set_xlabel("")
sf.set_title(s_str[idx])
#sf.set_rotation(30)
a = (df.loc[:,"q6_houshold_membs"])
s_str = ["Household members"]
g = sns.distplot(a, bins=5, color="darkblue", hist=True, kde=False,
hist_kws={'range':(1,5), 'edgecolor':'black', 'alpha':0.8});
fig = plt.gca()
fig.set_xticks([1,5])
fig.set_xticklabels(["1","5+"])
fig.set_xlabel("Household members")
a = (df.loc[:,"q6_media_freq"])
#s_str = ["How often do "]
g = sns.countplot(a, color="darkblue").set_title("How often do you follow the COVID-related news?");
fig = plt.gca();
fig.set_xticks(np.arange(5)-0.5);
fig.set_xticklabels(fig.get_xticklabels(), rotation=45);
a = (df.loc[:,"q6_media_valence"])
g = sns.distplot(a, bins=7, color="darkblue", hist=True, kde=False,
hist_kws={'range':(-3,4), 'edgecolor':'black', 'alpha':0.8});
fig = plt.gca()
fig.set_xticks(np.arange(-3,4))
fig.set_xlabel("Consumed media valence")
a = (df.loc[:,"q7_worry_infected":"q7_vir_made_lab"])
s_str = ["Worry I will get infected", "Worry I will die", "Worry about economic impact on me", "Worry something bad will happen to me", "Worry there won't be sufficient help", "Worry close person will get infected", "Worry close person will die/get serious", "Worry about shortages", "WE are in a period of danger", "We are in a period of safety", "I was surprised when pandemic broke out", "I was very scared initially", "People overreact to it", "Virus is not as dangerous", "Virus is made in lab"]
a
fig, ax = plt.subplots(5, 3, figsize=(20, 15))
fig.subplots_adjust( wspace=0.3, hspace=0.3)
for idx, (col, sf) in enumerate(zip(a, ax.flatten())):
g = sns.distplot(a[col], ax=sf, bins=7, color="darkblue", hist=True, kde=False,
hist_kws={'range':(1,7), 'edgecolor':'black', 'alpha':0.4},)
g.set_title("Objective measures")
sf.set_xticks([1,7])
sf.set_xticklabels(["Strongly disagree", "Strongly Agree"])
sf.set_xlabel("")
sf.set_title(s_str[idx])
a = (df.loc[:,"q7_inf_worry_frequency"]).replace({"Nearly every day (more than half the days)": "(d) Nearly all days",
"On one or several days": "(b) One or several",
"Never": "(a) Never", "On about half the days": "(c) On about hald the days"})
a = a.sort_values()
g = sns.countplot(a, color="darkblue").set_title("Worried about getting infected (out of past 2 weeks)");
fig = plt.gca();
fig.set_xticklabels(fig.get_xticklabels(), rotation=45);
a = (df.loc[:,"q7_diff_beh_freq"]).replace({"Nearly every day (more than half the days)": "(d) Nearly all days",
"On one or several days": "(b) One or several",
"Never": "(a) Never", "On about half the days": "(c) On about hald the days"})
a = a.sort_values()
g = sns.countplot(a, color="darkblue").set_title("Behaved differently (out of past 2 weeks)");
fig = plt.gca();
fig.set_xticklabels(fig.get_xticklabels(), rotation=45);
a = (df.loc[:,"q7_beh_wash_hands":"q7_anx_another_beh"])
s_str = ["Meticulously wash hands", "Avoid people", "Avoid public places",
"Avoid touching surfaces outside of my house", "Avoid standing close to ppl",
"Avoid eating food prepared by others", "Avoid using public transport",
"Avoid visiting doctor", "Avoid other behaviours"]
a
fig, ax = plt.subplots(3, 3, figsize=(20, 15))
fig.subplots_adjust( wspace=0.4, hspace=0.3)
for idx, (col, sf) in enumerate(zip(a, ax.flatten())):
g = sns.distplot(a[col], ax=sf, bins=7, color="darkblue", hist=True, kde=False,
hist_kws={'range':(1,7), 'edgecolor':'black', 'alpha':0.4},)
g.set_title("Objective measures")
sf.set_xticks([1,7])
sf.set_xticklabels(["Strongly disagree", "Strongly Agree"])
sf.set_xlabel("")
sf.set_title(s_str[idx])
#### COVID-related predictions / estimates
a = (df.loc[:,"q8_prob_inf_me":"q8_prob_inf_avgp"])
s_str = ["Prob: I get infected", "Prob: I die", "Prob: Severe economic impact on me",
"Prob: Close person infected", "Prob: Close person die", "Prob: Average person infected"]
fig, ax = plt.subplots(2,3, figsize=(20, 15))
fig.subplots_adjust( wspace=0.4, hspace=0.3)
for idx, (col, sf) in enumerate(zip(a, ax.flatten())):
g = sns.distplot(a[col].astype(int), ax=sf, bins=10, color="pink", hist=True, kde=False,
hist_kws={'range':(1,100), 'edgecolor':'black', 'alpha':0.7},)
g.set_title("Probability estimates")
#sf.set_xticks([1,7])
#sf.set_xticklabels(["Strongly disagree", "Strongly Agree"])
sf.set_xlabel("")
sf.set_title(s_str[idx])
#sns.distplot(a)
from datetime import datetime, timedelta
from collections import OrderedDict
dates = ["2020-05-01", "2023-06-01"]
start, end = [datetime.strptime(_, "%Y-%m-%d") for _ in dates]
mostr = OrderedDict(((start + timedelta(_)).strftime(r"%Y-%m"), None) for _ in range((end - start).days)).keys()
un = df.loc[:,"q8_t_pand_end"].value_counts(mostr, sort=True)
un = un.sort_index()
fig = plt.figure(figsize=(16, 6))
fig = plt.bar(x = un.index, height = un, color="darkblue")
plt.xticks(rotation=70);
ax = plt.gca()
ax.set_title("Expected end of pandemic");
dates = ["2020-05-01", "2023-06-01"]
start, end = [datetime.strptime(_, "%Y-%m-%d") for _ in dates]
mostr = OrderedDict(((start + timedelta(_)).strftime(r"%Y-%m"), None) for _ in range((end - start).days)).keys()
un = df.loc[:,"q8_t_life_back_norm"].value_counts(mostr, sort=True)
un = un.sort_index()
fig = plt.figure(figsize=(16, 6))
fig = plt.bar(x = un.index, height = un, color="darkblue")
plt.xticks(rotation=70);
ax = plt.gca()
ax.set_title("When will life come back to normal?");
a = np.squeeze((df.loc[:,["q8_secondw"]]))
g = sns.countplot(a).set_title('Will there be a second wave?')
ax = plt.gca()
ax.set_xticklabels(["No", "Yes"])
for p in ax.patches:
ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.35, p.get_height()+3))
dates = ["2020-05-01", "2023-06-01"]
start, end = [datetime.strptime(_, "%Y-%m-%d") for _ in dates]
mostr = OrderedDict(((start + timedelta(_)).strftime(r"%Y-%m"), None) for _ in range((end - start).days)).keys()
un = df.loc[:,"q8_t_secondw_when"].value_counts(mostr, sort=True)
un = un.sort_index()
fig = plt.figure(figsize=(16, 6))
fig = plt.bar(x = un.index, height = un, color="darkblue")
plt.xticks(rotation=70);
ax = plt.gca()
ax.set_title("If yes, when will the second wave come?");
dates = ["2020-05-01", "2023-06-01"]
start, end = [datetime.strptime(_, "%Y-%m-%d") for _ in dates]
mostr = OrderedDict(((start + timedelta(_)).strftime(r"%Y-%m"), None) for _ in range((end - start).days)).keys()
un = df.loc[:,"q8_t_econ_back_norm"].value_counts(mostr, sort=True)
un = un.sort_index()
fig = plt.figure(figsize=(16, 6))
fig = plt.bar(x = un.index, height = un, color="darkblue")
plt.xticks(rotation=70);
ax = plt.gca()
ax.set_title("When will the economy come back to normal?");
import seaborn as sns
import matplotlib.pyplot as plt
ax = sns.distplot(df.q7_vir_made_lab)
print(df.q7_vir_not_as_dangerous.value_counts())
q_c = ["q7_vir_made_lab", "q7_vir_not_as_dangerous"]
g = sns.jointplot(x=q_c[0], y=q_c[1], data=df, kind="kde");
g.ax_joint.set_xticks(np.arange(1,8));
g.ax_joint.set_yticks(np.arange(1,8));
keep_vars = ["stai_ta", "stai_sa", "sticsa_ta", "sticsa_cog_ta", "sticsa_som_ta", "sticsa_sa", "sticsa_cog_sa", "sticsa_som_sa", "bdi","cat"]
voi = df.loc[:, df.columns.intersection(keep_vars) ]
g = sns.pairplot(voi, corner=True, diag_kind="kde", kind="reg")
g.map_lower(corrfunc)
key_vars = ["covid_worry", "covid_avoidance_beh", "covid_spec_anxiety", "covid_prob_estimates", "covid_end_est"]
qnames = ["STAI-TRAIT", "STAI-STATE", "STICSA-TRAIT", "STICSA-TRAIT-COGNITIVE", "STICSA-TRAIT-SOMATIC", "STICSA-STATE", "STICSA-STATE-COGNITIVE", "STICSA-STATE-SOMATIC", "BDI", "Catastrophizing"]
questionnaires = ["stai_ta", "stai_sa", "sticsa_ta", "sticsa_cog_ta", "sticsa_som_ta", "sticsa_sa", "sticsa_cog_sa", "sticsa_som_sa", "bdi","cat"]
for qidx, qs in enumerate(questionnaires):
display(Markdown("#### "+qnames[qidx]))
keep_vars = np.append(key_vars, qs)
voi = df.loc[:, df.columns.intersection(keep_vars) ]
g = sns.pairplot(voi, corner=True, diag_kind="kde", kind="reg")
g.map_lower(corrfunc)
plt.subplots_adjust(top=0.9)
g.fig.suptitle(qnames[qidx])
var_groups = [["q6_econ_impact_me","q6_econ_impact_closep","q6_work_home","q6_apply_soc_dist", "q6_risk_group", "q6_risk_group_closep","q6_houshold_membs", "q6_media_valence"],
["q7_worry_infected","q7_worry_die","q7_worry_econ_impact","q7_worry_sthg_bad","q7_worry_insuf_help", "q7_worry_closep_inf", "q7_closep_die","q7_worry_shortage"],
["q7_period_rel_danger","q7_period_rel_safety", "q7_initial_surprise","q7_initial_scared","q7_people_overreact","q7_vir_not_as_dangerous","q7_vir_made_lab"],
["q7_beh_wash_hands", "q7_beh_avoid_ppl", "q7_beh_avoid_public_places"],
["q7_anx_touching_surf","q7_anx_stand_close_to_ppl","q7_anx_eating_food_out","q7_anx_public_transp", "q7_anx_visit_doc", "q7_anx_another_beh"],
["q8_prob_inf_me", "q8_prob_die_me", "q8_prob_econ_imp_me", "q8_prob_inf_closep", "q8_prob_die_closep","q8_prob_inf_avgp"],
["q8_t_pand_end_days","q8_t_life_back_norm_days","q8_t_secondw_when_days","q8_t_econ_back_norm_days"]
]
print(len(var_groups))
group_names = ["Objective measures", "Worries", "Worries&Attitudes", "Behaviours", "Anxieties", "Objective Probabilities", "Time Estimates"]
qnames = ["STAI-TRAIT", "STAI-STATE", "STICSA-TRAIT", "STICSA-TRAIT-COGNITIVE", "STICSA-TRAIT-SOMATIC", "STICSA-STATE", "STICSA-STATE-COGNITIVE", "STICSA-STATE-SOMATIC", "BDI", "Catastrophizing"]
questionnaires = ["stai_ta", "stai_sa", "sticsa_ta", "sticsa_cog_ta", "sticsa_som_ta", "sticsa_sa", "sticsa_cog_sa", "sticsa_som_sa", "bdi","cat"]
for qidx, qs in enumerate(questionnaires):
display(Markdown("#### "+qnames[qidx]))
for gr in range(len(var_groups)):
display(Markdown("##### "+group_names[gr]))
keep_vars = np.append(var_groups[gr], qs)
voi = df.loc[:, df.columns.intersection(keep_vars) ]
g = sns.pairplot(voi, corner=True, diag_kind="kde", kind="reg")
g.map_lower(corrfunc)
plt.subplots_adjust(top=0.9)
g.fig.suptitle(group_names[gr]+": "+qnames[qidx])