Analyzing DOSPERT from Qualtrics (Note: only uses one response)
First, load the neccessary packages that will be needed for the analyses and set the Working Directory.
library(XML)
library(plyr)
library(stringr)
library(reshape2)
setwd("C:/Users/World/Desktop/Joe/Research/CDS - Columbia/DOSPERT")
Next read the file in from Qualtrics.
dataQ <- xmlToDataFrame("DOSPERT.xml", stringsAsFactors = F)
This is how the head of your columns should look after uploading the file.
head(dataQ)
## ResponseID ResponseSet Name ExternalDataReference
## 1 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## EmailAddress IPAddress Status StartDate
## 1 128.59.199.242 0 2014-05-27 15:25:02
## EndDate Finished RiskTaking_S1 RiskTaking_R2 RiskTaking_FG3
## 1 2014-05-27 15:27:12 1 2 3 3
## RiskTaking_FI4 RiskTaking_HS5 RiskTaking_E6 RiskTaking_S7 RiskTaking_FG8
## 1 2 3 4 5 5
## RiskTaking_E9 RiskTaking_E10 RiskTaking_R11 RiskTaking_FI12
## 1 4 3 2 4
## RiskTaking_R13 RiskTaking_FG14 RiskTaking_HS15 RiskTaking_E16
## 1 4 3 4 4
## RiskTaking_HS17 RiskTaking_FI18 RiskTaking_R19 RiskTaking_HS20
## 1 3 3 3 4
## RiskTaking_S21 RiskTaking_S22 RiskTaking_HS23 RiskTaking_R24
## 1 4 3 3 4
## RiskTaking_R25 RiskTaking_HS26 RiskTaking_S27 RiskTaking_S28
## 1 4 4 3 4
## RiskTaking_E29 RiskTaking_E30 RiskPerceptions_S1 RiskPerceptions_R2
## 1 3 6 2 2
## RiskPerceptions_FG3 RiskPerceptions_FI4 RiskPerceptions_HS5
## 1 3 3 3
## RiskPerceptions_E6 RiskPerceptions_S7 RiskPerceptions_FG8
## 1 3 2 5
## RiskPerceptions_E9 RiskPerceptions_E10 RiskPerceptions_R11
## 1 4 5 6
## RiskPerceptions_FI12 RiskPerceptions_R13 RiskPerceptions_FG14
## 1 6 5 4
## RiskPerceptions_HS15 RiskPerceptions_E16 RiskPerceptions_HS17
## 1 3 5 5
## RiskPerceptions_FI18 RiskPerceptions_R19 RiskPerceptions_HS20
## 1 4 3 5
## RiskPerceptions_S21 RiskPerceptions_S22 RiskPerceptions_HS23
## 1 5 6 7
## RiskPerceptions_R24 RiskPerceptions_R25 RiskPerceptions_HS26
## 1 7 2 7
## RiskPerceptions_S27 RiskPerceptions_S28 RiskPerceptions_E29
## 1 5 2 3
## RiskPerceptions_E30 ExpectedBenefits_S1 ExpectedBenefits_R2
## 1 3 1 2
## ExpectedBenefits_FG3 ExpectedBenefits_FI4 ExpectedBenefits_HS5
## 1 3 5 5
## ExpectedBenefits_E6 ExpectedBenefits_S7 ExpectedBenefits_FG8
## 1 3 3 4
## ExpectedBenefits_E9 ExpectedBenefits_E10 ExpectedBenefits_R11
## 1 5 6 7
## ExpectedBenefits_FI12 ExpectedBenefits_R13 ExpectedBenefits_FG14
## 1 3 3 5
## ExpectedBenefits_HS15 ExpectedBenefits_E16 ExpectedBenefits_HS17
## 1 5 4 3
## ExpectedBenefits_FI18 ExpectedBenefits_R19 ExpectedBenefits_HS20
## 1 3 5 5
## ExpectedBenefits_S21 ExpectedBenefits_S22 ExpectedBenefits_HS23
## 1 5 3 3
## ExpectedBenefits_R24 ExpectedBenefits_R25 ExpectedBenefits_HS26
## 1 5 5 4
## ExpectedBenefits_S27 ExpectedBenefits_S28 ExpectedBenefits_E29
## 1 3 4 5
## ExpectedBenefits_E30 LocationLatitude LocationLongitude LocationAccuracy
## 1 6 40.800598144531 -73.965301513672 -1
Eliminating the location columns and melting the data into the right structure.
dataQ <- dataQ[, -101:-103] ## Skip this step if you need the location columns for further analysis
dataQm <- melt(dataQ, id = c("ResponseID", "ResponseSet", "Name", "ExternalDataReference",
"EmailAddress", "IPAddress", "Status", "StartDate", "EndDate", "Finished"))
This is how your data should look after being melted.
head(dataQm)
## ResponseID ResponseSet Name ExternalDataReference
## 1 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## 2 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## 3 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## 4 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## 5 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## 6 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous
## EmailAddress IPAddress Status StartDate
## 1 128.59.199.242 0 2014-05-27 15:25:02
## 2 128.59.199.242 0 2014-05-27 15:25:02
## 3 128.59.199.242 0 2014-05-27 15:25:02
## 4 128.59.199.242 0 2014-05-27 15:25:02
## 5 128.59.199.242 0 2014-05-27 15:25:02
## 6 128.59.199.242 0 2014-05-27 15:25:02
## EndDate Finished variable value
## 1 2014-05-27 15:27:12 1 RiskTaking_S1 2
## 2 2014-05-27 15:27:12 1 RiskTaking_R2 3
## 3 2014-05-27 15:27:12 1 RiskTaking_FG3 3
## 4 2014-05-27 15:27:12 1 RiskTaking_FI4 2
## 5 2014-05-27 15:27:12 1 RiskTaking_HS5 3
## 6 2014-05-27 15:27:12 1 RiskTaking_E6 4
Split the variable column into 3 columns.
dataQm <- mutate(dataQm, Scale = str_extract(variable, "(RiskTaking|RiskPerceptions|ExpectedBenefits)"),
Domain = str_replace(variable, "[A-Za-z]*_([A-Za-z]{1,2})\\d+", "\\1"),
Question = str_extract(variable, "\\d{1,2}"))
This is how the new columns should look.
head(dataQm[, 13:15])
## Scale Domain Question
## 1 RiskTaking S 1
## 2 RiskTaking R 2
## 3 RiskTaking FG 3
## 4 RiskTaking FI 4
## 5 RiskTaking HS 5
## 6 RiskTaking E 6
The sums have to be calculated of each domain for each subject.
dataQm$value <- as.numeric(dataQm$value)
ddply(dataQm, .(ResponseID, Domain), summarize, sum = round(sum(value)))
## ResponseID Domain sum
## 1 R_3BMHpaFPDvFw9Wl E 76
## 2 R_3BMHpaFPDvFw9Wl FG 35
## 3 R_3BMHpaFPDvFw9Wl FI 33
## 4 R_3BMHpaFPDvFw9Wl HS 76
## 5 R_3BMHpaFPDvFw9Wl R 72
## 6 R_3BMHpaFPDvFw9Wl S 62
The data will be casted into the right format to conduct a linear regression.
dataQc <- dcast(dataQm, ResponseID + ResponseSet + Name + ExternalDataReference +
EmailAddress + IPAddress + Status + StartDate + EndDate + Finished + Domain +
Question ~ Scale, value.var = "value")
This is how the last 5 columns of your casted data frame should look
head(dataQc[, 11:15])
## Domain Question ExpectedBenefits RiskPerceptions RiskTaking
## 1 E 10 6 5 3
## 2 E 16 4 5 4
## 3 E 29 5 3 3
## 4 E 30 6 3 6
## 5 E 6 3 3 4
## 6 E 9 5 4 4
Next we will regress “Expected Benefits” and “Risk Perceptions” on “Risk Taking” for each item on the scale to calculate risk-attitude.
model <- function(dataQc) {
lm(RiskTaking ~ RiskPerceptions + ExpectedBenefits, data = dataQc)
}
dataQc.lm <- dlply(dataQc, .(Domain, ResponseID), model)
Your regression results should look like these:
head(dataQc.lm)
## $E.R_3BMHpaFPDvFw9Wl
##
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits,
## data = dataQc)
##
## Coefficients:
## (Intercept) RiskPerceptions ExpectedBenefits
## 4.747 -0.448 0.201
##
##
## $FG.R_3BMHpaFPDvFw9Wl
##
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits,
## data = dataQc)
##
## Coefficients:
## (Intercept) RiskPerceptions ExpectedBenefits
## 1.000 1.333 -0.667
##
##
## $FI.R_3BMHpaFPDvFw9Wl
##
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits,
## data = dataQc)
##
## Coefficients:
## (Intercept) RiskPerceptions ExpectedBenefits
## 1.75 0.50 -0.25
##
##
## $HS.R_3BMHpaFPDvFw9Wl
##
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits,
## data = dataQc)
##
## Coefficients:
## (Intercept) RiskPerceptions ExpectedBenefits
## -0.00806 0.21774 0.58065
##
##
## $R.R_3BMHpaFPDvFw9Wl
##
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits,
## data = dataQc)
##
## Coefficients:
## (Intercept) RiskPerceptions ExpectedBenefits
## 4.0817 0.0722 -0.2331
##
##
## $S.R_3BMHpaFPDvFw9Wl
##
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits,
## data = dataQc)
##
## Coefficients:
## (Intercept) RiskPerceptions ExpectedBenefits
## 2.471 -0.277 0.645