Commit 736133b5 authored by Maike Kleemeyer's avatar Maike Kleemeyer
Browse files

Add helper comments

parent 0eb57d83
library(corrr)
# Binary questions
# Do you work in a group?
table(responses$A01, useNA = "ifany")
# Did you have a personal conversation with your supervisor?
table(responses$B14, useNA = "ifany")
# Have you been subjected to bullying during the past 12 months?
table(responses$D12, useNA = "ifany")
# Have you been subjected to gender discrimination during the past 12 months?
table(responses$E05, useNA = "ifany")
# Have you been subjected to sexual harassment during the past 12 months?
table(responses$F11, useNA = "ifany")
# Do children under the age of 18 live in your household?
table(responses$H07, useNA = "ifany")
# Do people with care needs live in your household?
df <- as.data.frame(table(responses$H30, useNA = "ifany"))
# Have you taken parental leave?
table(responses$H14, useNA = "ifany")
# Gender?
table(responses$I02, useNA = "ifany")
# Position?
table(responses$I03, useNA = "ifany")
# COMPUTE sum scores and means for A2, A3, B1, B2, C1, D1, D2, E1, E2, G1, G5, G7 # COMPUTE sum scores and means for A2, A3, B1, B2, C1, D1, D2, E1, E2, G1, G5, G7
analyses <- responses_coded %>%
mutate(sum_A = rowSums(select(.,A03:A05), na.rm = TRUE),
sum_B1 = rowSums(select(.,B02:B07), na.rm = TRUE),
sum_B2 = rowSums(select(.,B09:B14), na.rm = TRUE),
sum_C = rowSums(select(.,C02:C04), na.rm = TRUE),
sum_D = rowSums(select(.,D02:D06), na.rm = TRUE),
sum_E = rowSums(select(.,E03:E05), na.rm = TRUE),
sum_F = rowSums(select(.,F02:F08), na.rm = TRUE),
sum_H_worklife = rowSums(select(.,H02:H06), na.rm = TRUE),
sum_H_workfamily = rowSums(select(.,H09:H13), na.rm = TRUE),
sum_H_parentalleave_yes = rowSums(select(.,H17:H21), na.rm = TRUE),
sum_H_parentalleave_no = rowSums(select(.,H23:H25), na.rm = TRUE)
)
# Zeroes should be removed here, which appear if all items are not answered (=NA)
analyses <- analyses %>%
mutate(mean_A = rowMeans(select(.,A03:A05), na.rm = TRUE),
mean_B1 = rowMeans(select(.,B02:B07), na.rm = TRUE),
mean_B2 = rowMeans(select(.,B09:B14), na.rm = TRUE),
mean_C = rowMeans(select(.,C02:C04), na.rm = TRUE),
mean_D = rowMeans(select(.,D02:D06), na.rm = TRUE),
mean_E = rowMeans(select(.,E03:E05), na.rm = TRUE),
mean_F = rowMeans(select(.,F02:F08), na.rm = TRUE),
mean_H_worklife = rowMeans(select(.,H02:H06), na.rm = TRUE),
mean_H_workfamily = rowMeans(select(.,H09:H13), na.rm = TRUE),
mean_H_parentalleave_yes = rowMeans(select(.,H17:H21), na.rm = TRUE),
mean_H_parentalleave_no = rowMeans(select(.,H23:H25), na.rm = TRUE)
)
scale_A <- responses_coded %>%
select(A03:A05)
inter_item_A <- scale_A %>%
correlate() %>%
select(-term) %>%
colMeans(na.rm = TRUE)
mean(inter_item_A)
scale_A <- scale_A %>%
mutate(mean_A = rowMeans(., na.rm = TRUE))
item_total_A <- scale_A %>%
correlate() %>%
focus(mean_A)
item_total_A
mean(item_total_A$mean_A)
scale_A$mean_A <- NULL # delete the score column we made earlier
rely_A <- psych::alpha(scale_A, check.keys=FALSE)
psych::alpha(scale_A, check.keys=TRUE)$total$std.alpha
scale_B1 <- responses_coded %>%
select(B02:B07)
scale_B2 <- responses_coded %>%
select(B09:B14)
scale_C <- responses_coded %>%
select(C02:C04)
scale_D <- responses_coded %>%
select(D02:D06)
scale_E <- responses_coded %>%
select(E03:E05)
scale_F <- responses_coded %>%
select(F02:F08)
scale_H_worklife <- responses_coded %>%
select(H02:H06)
scale_H_workfamily <- responses_coded %>%
select(H09:H13)
scale_H_parentalleave_yes <- responses_coded %>%
select(H17:H21)
scale_H_parentalleave_no <- responses_coded %>%
select(H23:H25)
# Run Factor analysis on all likert scale items # Run Factor analysis on all likert scale items
## OLD R CODE ## OLD R CODE
setwd("~/Seafile/FragebogenMpg") setwd("~/Seafile/FragebogenMpg/Reanalysen_Feb2020")
file1 = read.csv("20200106_MPG Work Culture_Basic Data_likert.csv",header=TRUE,sep=";") file1 = read.csv("20200106_MPG Work Culture_Basic Data_likert.csv",header=TRUE,sep=";")
library(psych) library(psych)
......
---
title: "MPGQuestionnaireReport"
author: "Kleemeyer"
date: "11/4/2020"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)
source("get_data_exported.R")
```
# Flow of participants
We received BETULA data from 6 waves.
--
* Wave 1 `r nrow(filter(dat, T1_test_wave == 1))` participants (`r nrow(filter(dat, T1_test_wave == 1 & sex == 1))` male).
* Wave 2 `r nrow(filter(dat, T2_test_wave == 2))` participants (`r nrow(filter(dat, T2_test_wave == 2 & sex == 1))` male).
* Wave 3 `r nrow(filter(dat, T3_test_wave == 3))` participants (`r nrow(filter(dat, T3_test_wave == 3 & sex == 1))` male).
* Wave 4 `r nrow(filter(dat, T4_test_wave == 4))` participants (`r nrow(filter(dat, T4_test_wave == 4 & sex == 1))` male).
* Wave 5 `r nrow(filter(dat, T5_test_wave == 5))` participants (`r nrow(filter(dat, T5_test_wave == 5 & sex == 1))` male).
* Wave 6 `r nrow(filter(dat, T6_test_wave == 6))` participants (`r nrow(filter(dat, T6_test_wave == 6 & sex == 1))` male).
```{r echo = FALSE}
data_waves <- data.frame(
waves = c("Total", "TP6", "TP5", "TP4", "TP3", "TP2", "TP1"),
wave1 = c( 1000, 221, 135, 141, 157, 193, 153),
wave2 = c( 2804, 0, 225, 155, 199, 788, 592),
wave2from1 = c( 845, 221, 135, 141, 156, 192, 0),
wave3 = c( 1998, 0, 0, 0, 0, 0, 1),
wave3from2 = c( 1353, 0, 225, 154, 196, 778, 0),
wave3from1 = c( 644, 221, 135, 138, 149, 1, 0),
wave4 = c( 1068, 0, 0, 0, 0, 0, 0),
wave4from2 = c( 572, 0, 225, 154, 185, 8, 0),
wave4from1 = c( 496, 221, 132, 137, 6, 0, 0),
wave5 = c( 1115, 0, 0, 0, 0, 64, 293),
wave5from2 = c( 395, 0, 225, 151, 17, 2, 0),
wave5from1 = c( 363, 221, 133, 6, 3, 0, 0),
wave6 = c( 522, 0, 0, 0, 0, 0, 0),
wave6from5 = c( 64, 0, 0, 0, 0, 64, 0),
wave6from2 = c( 231, 0, 225, 6, 0, 0, 0),
wave6from1 = c( 227, 5, 1, 0, 0, 0, 0),
)
df = data.frame(waves, wave1, wave2, wave2from1, wave3, wave3from2, wave3from1, wave4, wave4from2, wave4from1, wave5, wave5from2, wave5from1, wave6, wave6from5, wave6from2, wave6from1)
```
# Age Distributions
BETULA has 6 waves. Here is a quick look at the age distribution collapsed across all 6 waves:
```{r cars, echo=FALSE, warning=FALSE}
age <- c(dat$T1_age_T,dat$T2_age_T,dat$T3_age_T,dat$T4_age_T,dat$T5_age_T,dat$T6_age_T)
agedat <- data.frame(age)
pl1 <- ggplot(agedat,aes(x=1,y=age))+geom_jitter(alpha=.2)+geom_boxplot()+theme_minimal()
pl2 <- ggplot(agedat,aes(x=age))+geom_density()+theme_minimal()
gridExtra::grid.arrange(pl1,pl2, nrow=1)
```
# Sex Distribution
```{r}
ggplot(dat, aes(x=sex))+geom_bar()+theme_minimal()
```
# Dementia Status Distribution
```{r}
ggplot(dat, aes(x=sex))+geom_bar()+theme_minimal()
```
# Available variables
Crystalized intelligence
--
* SRB (v502) total number correct (out of 30) throughout all waves
* BAOTA (baotasum) sum correct answers (out of 26) exists from wave 4
Fluid intelligence
--
* block design (v311) total score throughout all waves
Episodic memory (recall)
--
* sptb (sum score verb and noun; max 16) throughout all waves
Perceptual speed
--
* letter digit (lettd_C; number of correct answers; max 125) exists from wave 3
# Distribution of variables seperated by waves
SRB (Crystalized intelligence)
--
```{r fig.width = 3.5, echo=FALSE, warning=FALSE}
hist(dat$T1_v502 [dat$T1_test_wave == 1],
xlim = c(0,30),
breaks = 10,
col = "red",
main = "wave 1",
xlab = "number correct")
hist(dat$T2_v502 [dat$T2_test_wave == 2],
xlim = c(0,30),
breaks = 10,
col = "orange",
main = "wave 2",
xlab = "number correct")
hist(dat$T3_v502 [dat$T3_test_wave == 3],
xlim = c(0,30),
breaks = 10,
col = "brown",
main = "wave 3",
xlab = "number correct")
hist(dat$T4_v502 [dat$T4_test_wave == 4],
xlim = c(0,30),
breaks = 10,
col = "blue",
main = "wave 4",
xlab = "number correct")
hist(dat$T5_v502 [dat$T5_test_wave == 5],
xlim = c(0,30),
breaks = 10,
col = "purple",
main = "wave 5",
xlab = "number correct")
hist(dat$T6_v502 [dat$T6_test_wave == 6],
xlim = c(0,30),
breaks = 10,
col = "green",
main = "wave 6",
xlab = "number correct")
```
Baota (Crystalized intelligence)
--
```{r fig.width = 3.5, echo=FALSE, warning=FALSE}
hist(dat$T4_baotasum [dat$T4_test_wave == 4],
xlim = c(0,25),
breaks = 10,
col = "blue",
main = "wave 4",
xlab = "number correct")
hist(dat$T5_baotasum [dat$T5_test_wave == 5],
xlim = c(0,25),
breaks = 10,
col = "purple",
main = "wave 5",
xlab = "number correct")
hist(dat$T6_baotasum [dat$T6_test_wave == 6],
xlim = c(0,25),
breaks = 10,
col = "green",
main = "wave 6",
xlab = "number correct")
```
Block design (Fluid intelligence)
--
```{r fig.width = 3.5, echo=FALSE, warning=FALSE}
hist(dat$T1_v311 [dat$T1_test_wave == 1],
xlim = c(0,51),
breaks = 10,
col = "red",
main = "wave 1",
xlab = "total score")
hist(dat$T2_v311 [dat$T2_test_wave == 2],
xlim = c(0,51),
breaks = 10,
col = "orange",
main = "wave 2",
xlab = "total score")
hist(dat$T3_v311 [dat$T3_test_wave == 3],
xlim = c(0,51),
breaks = 10,
col = "brown",
main = "wave 3",
xlab = "total score")
hist(dat$T4_v311 [dat$T4_test_wave == 4],
xlim = c(0,51),
breaks = 10,
col = "blue",
main = "wave 4",
xlab = "total score")
hist(dat$T5_v311 [dat$T5_test_wave == 5],
xlim = c(0,51),
breaks = 10,
col = "purple",
main = "wave 5",
xlab = "total score")
hist(dat$T6_v311 [dat$T6_test_wave == 6],
xlim = c(0,51),
breaks = 10,
col = "green",
main = "wave 6",
xlab = "total score")
```
Sptb (Episodic memory)
--
```{r fig.width = 3.5, echo=FALSE, warning=FALSE}
hist(dat$T1_sptb [dat$T1_test_wave == 1],
xlim = c(0,16),
breaks = 10,
col = "red",
main = "wave 1",
xlab = "sum score")
hist(dat$T2_sptb [dat$T2_test_wave == 2],
xlim = c(0,16),
breaks = 10,
col = "orange",
main = "wave 2",
xlab = "sum score")
hist(dat$T3_sptb [dat$T3_test_wave == 3],
xlim = c(0,16),
breaks = 10,
col = "brown",
main = "wave 1",
xlab = "sum score")
hist(dat$T4_sptb [dat$T4_test_wave == 4],
xlim = c(0,16),
breaks = 10,
col = "blue",
main = "wave 4",
xlab = "sum score")
hist(dat$T5_sptb [dat$T5_test_wave == 5],
xlim = c(0,16),
breaks = 10,
col = "purple",
main = "wave 5",
xlab = "sum score")
hist(dat$T6_sptb [dat$T6_test_wave == 6],
xlim = c(0,16),
breaks = 10,
col = "green",
main = "wave 6",
xlab = "sum score")
```
Letter digit (Perceptual speed)
--
```{r fig.width = 4, echo=FALSE, warning=FALSE}
hist(dat$T3_lettd_C [dat$T3_test_wave == 3],
xlim = c(0,55),
breaks = 10,
col = "brown",
main = "wave 3",
xlab = "number correct")
hist(dat$T4_lettd_C [dat$T4_test_wave == 4],
xlim = c(0,55),
breaks = 10,
col = "blue",
main = "wave 4",
xlab = "number correct")
hist(dat$T5_lettd_C [dat$T5_test_wave == 5],
xlim = c(0,55),
breaks = 10,
col = "purple",
main = "wave 5",
xlab = "number correct")
hist(dat$T6_lettd_C [dat$T6_test_wave == 6],
xlim = c(0,55),
breaks = 10,
col = "green",
main = "wave 6",
xlab = "number correct")
```
# Longitudinal change
```{r pressure, echo=FALSE, warning=FALSE}
age <- c(dat$T1_age_T,dat$T2_age_T,dat$T3_age_T,dat$T4_age_T,dat$T5_age_T,dat$T6_age_T)
# gather longitrudinal 'crystallized' SRB scores
srb <- c(dat$T1_v502,dat$T2_v502,dat$T3_v502,dat$T4_v502,dat$T5_v502,dat$T6_v502)
tp <- rep(1:6,each=nrow(dat))
id <- rep(1:nrow(dat),6)
# and plot them
longdat <- data.frame(srb,tp,id,age)
mns <- summarise(group_by(longdat, age),disp=mean(srb,na.rm=TRUE))
pl1 <- ggplot(data=longdat,aes(x=age,y=srb,group=id))+geom_line()+theme_minimal()+ggplot2::ggtitle("SRB (Crystallized)")+
geom_smooth(data=mns,aes(x=age,y=disp,group=1),method="lm")+
xlab("Age")+ylab("Score")
# those are not meaningful
#mean(dat$T1_v502,na.rm=TRUE)
#mean(dat$T2_v502,na.rm=TRUE)
#mean(dat$T3_v502,na.rm=TRUE)
#mean(dat$T4_v502,na.rm=TRUE)
#mean(dat$T5_v502,na.rm=TRUE)
#mean(dat$T6_v502,na.rm=TRUE)
# gather longitudinal 'crystallized' BAOTA scores
age <- c(dat$T4_age_T,dat$T5_age_T,dat$T6_age_T)
baota <- c(dat$T4_baotasum, dat$T5_baotasum, dat$T6_baotasum)
tp <- rep(4:6,each=nrow(dat))
id <- rep(1:nrow(dat),3)
longdat <- data.frame(baota,tp,id,age)
mns <- summarise(group_by(longdat, age),disp=mean(baota,na.rm=TRUE))
pl2 <- ggplot(data=longdat,aes(x=age,y=baota,group=id))+geom_line()+theme_minimal()+ggtitle("BAOTA (Crystallized)")+
geom_smooth(data=mns,aes(x=age,y=disp,group=1),method="lm")+
xlab("Age")+ylab("Score")
# fluid
age <- c(dat$T1_age_T,dat$T2_age_T,dat$T3_age_T,dat$T4_age_T,dat$T5_age_T,dat$T6_age_T)
fld <- c(dat$T1_v311,dat$T2_v311, dat$T3_v311, dat$T4_v311, dat$T5_v311, dat$T6_v311)
tp <- rep(1:6,each=nrow(dat))
id <- rep(1:nrow(dat),6)
longdat <- data.frame(fld,tp,id,age)
mns <- summarise(group_by(longdat, age),disp=mean(fld,na.rm=TRUE))
pl3 <- ggplot(data=longdat,aes(x=age,y=fld,group=id))+geom_line()+theme_minimal()+ ggtitle("Block Design (Fluid)")+
geom_smooth(data=mns,aes(x=age,y=disp,group=1),method="lm")+
xlab("Age")+ylab("Score")
# letter digit (speed)
age <- c(dat$T3_age_T,dat$T4_age_T,dat$T5_age_T,dat$T6_age_T)
speed <- c(dat$T3_lettd_C, dat$T4_lettd_C, dat$T5_lettd_C, dat$T6_lettd_C)
tp <- rep(3:6,each=nrow(dat))
id <- rep(1:nrow(dat),4)
longdat <- data.frame(speed,tp,id,age)
mns <- summarise(group_by(longdat, age),disp=mean(speed,na.rm=TRUE))
pl4 <- ggplot(data=longdat,aes(x=age,y=speed,group=id))+geom_line()+theme_minimal()+ ggtitle("Digit Letter (Speed)")+
geom_smooth(data=mns,aes(x=age,y=disp,group=1),method="lm")+
xlab("Age")+ylab("Score")
# sptb
age <- c(dat$T1_age_T,dat$T2_age_T,dat$T3_age_T,dat$T4_age_T,dat$T5_age_T,dat$T6_age_T)
sptb <- c(dat$T1_sptb, dat$T2_sptb, dat$T3_sptb, dat$T4_sptb, dat$T5_sptb, dat$T6_sptb)
tp <- rep(1:6,each=nrow(dat))
id <- rep(1:nrow(dat),6)
longdat <- data.frame(sptb,tp,id,age)
mns <- summarise(group_by(longdat, age),disp=mean(fld,na.rm=TRUE))
pl5 <- ggplot(data=longdat,aes(x=age,y=sptb,group=id))+geom_line()+theme_minimal()+ ggtitle("SPTB (Memory)")+
geom_smooth(data=mns,aes(x=age,y=disp,group=1),method="lm")+
xlab("Age")+ylab("Score")
gridExtra::grid.arrange(pl1,pl2,pl3,pl4,pl5, nrow=2)
```
...@@ -8,7 +8,7 @@ options(lime_password = 'Zfznye5XUJWQ') ...@@ -8,7 +8,7 @@ options(lime_password = 'Zfznye5XUJWQ')
get_session_key() get_session_key()
# include the survey ID (to be found on the LimeSurvey page) here # include the survey ID (to be found on the LimeSurvey page) here --> this only gets us complete data sets
responses <- get_responses(944399) responses <- get_responses(944399)
# recode data from text answers to numerical data we can work with # recode data from text answers to numerical data we can work with
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment