Analyzing DOSPERT from Qualtrics (Note: only uses one response)

First, load the neccessary packages that will be needed for the analyses and set the Working Directory.

library(XML)
library(plyr)
library(stringr)
library(reshape2)
setwd("C:/Users/World/Desktop/Joe/Research/CDS - Columbia/DOSPERT")

Next read the file in from Qualtrics.

dataQ <- xmlToDataFrame("DOSPERT.xml", stringsAsFactors = F)

This is how the head of your columns should look after uploading the file.

head(dataQ)
##          ResponseID          ResponseSet      Name ExternalDataReference
## 1 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
##   EmailAddress      IPAddress Status           StartDate
## 1              128.59.199.242      0 2014-05-27 15:25:02
##               EndDate Finished RiskTaking_S1 RiskTaking_R2 RiskTaking_FG3
## 1 2014-05-27 15:27:12        1             2             3              3
##   RiskTaking_FI4 RiskTaking_HS5 RiskTaking_E6 RiskTaking_S7 RiskTaking_FG8
## 1              2              3             4             5              5
##   RiskTaking_E9 RiskTaking_E10 RiskTaking_R11 RiskTaking_FI12
## 1             4              3              2               4
##   RiskTaking_R13 RiskTaking_FG14 RiskTaking_HS15 RiskTaking_E16
## 1              4               3               4              4
##   RiskTaking_HS17 RiskTaking_FI18 RiskTaking_R19 RiskTaking_HS20
## 1               3               3              3               4
##   RiskTaking_S21 RiskTaking_S22 RiskTaking_HS23 RiskTaking_R24
## 1              4              3               3              4
##   RiskTaking_R25 RiskTaking_HS26 RiskTaking_S27 RiskTaking_S28
## 1              4               4              3              4
##   RiskTaking_E29 RiskTaking_E30 RiskPerceptions_S1 RiskPerceptions_R2
## 1              3              6                  2                  2
##   RiskPerceptions_FG3 RiskPerceptions_FI4 RiskPerceptions_HS5
## 1                   3                   3                   3
##   RiskPerceptions_E6 RiskPerceptions_S7 RiskPerceptions_FG8
## 1                  3                  2                   5
##   RiskPerceptions_E9 RiskPerceptions_E10 RiskPerceptions_R11
## 1                  4                   5                   6
##   RiskPerceptions_FI12 RiskPerceptions_R13 RiskPerceptions_FG14
## 1                    6                   5                    4
##   RiskPerceptions_HS15 RiskPerceptions_E16 RiskPerceptions_HS17
## 1                    3                   5                    5
##   RiskPerceptions_FI18 RiskPerceptions_R19 RiskPerceptions_HS20
## 1                    4                   3                    5
##   RiskPerceptions_S21 RiskPerceptions_S22 RiskPerceptions_HS23
## 1                   5                   6                    7
##   RiskPerceptions_R24 RiskPerceptions_R25 RiskPerceptions_HS26
## 1                   7                   2                    7
##   RiskPerceptions_S27 RiskPerceptions_S28 RiskPerceptions_E29
## 1                   5                   2                   3
##   RiskPerceptions_E30 ExpectedBenefits_S1 ExpectedBenefits_R2
## 1                   3                   1                   2
##   ExpectedBenefits_FG3 ExpectedBenefits_FI4 ExpectedBenefits_HS5
## 1                    3                    5                    5
##   ExpectedBenefits_E6 ExpectedBenefits_S7 ExpectedBenefits_FG8
## 1                   3                   3                    4
##   ExpectedBenefits_E9 ExpectedBenefits_E10 ExpectedBenefits_R11
## 1                   5                    6                    7
##   ExpectedBenefits_FI12 ExpectedBenefits_R13 ExpectedBenefits_FG14
## 1                     3                    3                     5
##   ExpectedBenefits_HS15 ExpectedBenefits_E16 ExpectedBenefits_HS17
## 1                     5                    4                     3
##   ExpectedBenefits_FI18 ExpectedBenefits_R19 ExpectedBenefits_HS20
## 1                     3                    5                     5
##   ExpectedBenefits_S21 ExpectedBenefits_S22 ExpectedBenefits_HS23
## 1                    5                    3                     3
##   ExpectedBenefits_R24 ExpectedBenefits_R25 ExpectedBenefits_HS26
## 1                    5                    5                     4
##   ExpectedBenefits_S27 ExpectedBenefits_S28 ExpectedBenefits_E29
## 1                    3                    4                    5
##   ExpectedBenefits_E30 LocationLatitude LocationLongitude LocationAccuracy
## 1                    6  40.800598144531  -73.965301513672               -1

Eliminating the location columns and melting the data into the right structure.

dataQ <- dataQ[, -101:-103]  ## Skip this step if you need the location columns for further analysis
dataQm <- melt(dataQ, id = c("ResponseID", "ResponseSet", "Name", "ExternalDataReference", 
    "EmailAddress", "IPAddress", "Status", "StartDate", "EndDate", "Finished"))

This is how your data should look after being melted.

head(dataQm)
##          ResponseID          ResponseSet      Name ExternalDataReference
## 1 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
## 2 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
## 3 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
## 4 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
## 5 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
## 6 R_3BMHpaFPDvFw9Wl Default Response Set Anonymous                      
##   EmailAddress      IPAddress Status           StartDate
## 1              128.59.199.242      0 2014-05-27 15:25:02
## 2              128.59.199.242      0 2014-05-27 15:25:02
## 3              128.59.199.242      0 2014-05-27 15:25:02
## 4              128.59.199.242      0 2014-05-27 15:25:02
## 5              128.59.199.242      0 2014-05-27 15:25:02
## 6              128.59.199.242      0 2014-05-27 15:25:02
##               EndDate Finished       variable value
## 1 2014-05-27 15:27:12        1  RiskTaking_S1     2
## 2 2014-05-27 15:27:12        1  RiskTaking_R2     3
## 3 2014-05-27 15:27:12        1 RiskTaking_FG3     3
## 4 2014-05-27 15:27:12        1 RiskTaking_FI4     2
## 5 2014-05-27 15:27:12        1 RiskTaking_HS5     3
## 6 2014-05-27 15:27:12        1  RiskTaking_E6     4

Split the variable column into 3 columns.

dataQm <- mutate(dataQm, Scale = str_extract(variable, "(RiskTaking|RiskPerceptions|ExpectedBenefits)"), 
    Domain = str_replace(variable, "[A-Za-z]*_([A-Za-z]{1,2})\\d+", "\\1"), 
    Question = str_extract(variable, "\\d{1,2}"))

This is how the new columns should look.

head(dataQm[, 13:15])
##        Scale Domain Question
## 1 RiskTaking      S        1
## 2 RiskTaking      R        2
## 3 RiskTaking     FG        3
## 4 RiskTaking     FI        4
## 5 RiskTaking     HS        5
## 6 RiskTaking      E        6

The sums have to be calculated of each domain for each subject.

dataQm$value <- as.numeric(dataQm$value)
ddply(dataQm, .(ResponseID, Domain), summarize, sum = round(sum(value)))
##          ResponseID Domain sum
## 1 R_3BMHpaFPDvFw9Wl      E  76
## 2 R_3BMHpaFPDvFw9Wl     FG  35
## 3 R_3BMHpaFPDvFw9Wl     FI  33
## 4 R_3BMHpaFPDvFw9Wl     HS  76
## 5 R_3BMHpaFPDvFw9Wl      R  72
## 6 R_3BMHpaFPDvFw9Wl      S  62

The data will be casted into the right format to conduct a linear regression.

dataQc <- dcast(dataQm, ResponseID + ResponseSet + Name + ExternalDataReference + 
    EmailAddress + IPAddress + Status + StartDate + EndDate + Finished + Domain + 
    Question ~ Scale, value.var = "value")

This is how the last 5 columns of your casted data frame should look

head(dataQc[, 11:15])
##   Domain Question ExpectedBenefits RiskPerceptions RiskTaking
## 1      E       10                6               5          3
## 2      E       16                4               5          4
## 3      E       29                5               3          3
## 4      E       30                6               3          6
## 5      E        6                3               3          4
## 6      E        9                5               4          4

Next we will regress “Expected Benefits” and “Risk Perceptions” on “Risk Taking” for each item on the scale to calculate risk-attitude.

model <- function(dataQc) {
    lm(RiskTaking ~ RiskPerceptions + ExpectedBenefits, data = dataQc)
}

dataQc.lm <- dlply(dataQc, .(Domain, ResponseID), model)

Your regression results should look like these:

head(dataQc.lm)
## $E.R_3BMHpaFPDvFw9Wl
## 
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits, 
##     data = dataQc)
## 
## Coefficients:
##      (Intercept)   RiskPerceptions  ExpectedBenefits  
##            4.747            -0.448             0.201  
## 
## 
## $FG.R_3BMHpaFPDvFw9Wl
## 
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits, 
##     data = dataQc)
## 
## Coefficients:
##      (Intercept)   RiskPerceptions  ExpectedBenefits  
##            1.000             1.333            -0.667  
## 
## 
## $FI.R_3BMHpaFPDvFw9Wl
## 
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits, 
##     data = dataQc)
## 
## Coefficients:
##      (Intercept)   RiskPerceptions  ExpectedBenefits  
##             1.75              0.50             -0.25  
## 
## 
## $HS.R_3BMHpaFPDvFw9Wl
## 
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits, 
##     data = dataQc)
## 
## Coefficients:
##      (Intercept)   RiskPerceptions  ExpectedBenefits  
##         -0.00806           0.21774           0.58065  
## 
## 
## $R.R_3BMHpaFPDvFw9Wl
## 
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits, 
##     data = dataQc)
## 
## Coefficients:
##      (Intercept)   RiskPerceptions  ExpectedBenefits  
##           4.0817            0.0722           -0.2331  
## 
## 
## $S.R_3BMHpaFPDvFw9Wl
## 
## Call:
## lm(formula = RiskTaking ~ RiskPerceptions + ExpectedBenefits, 
##     data = dataQc)
## 
## Coefficients:
##      (Intercept)   RiskPerceptions  ExpectedBenefits  
##            2.471            -0.277             0.645