This repository has been archived by the owner on Jan 13, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reduce_questions.R
86 lines (65 loc) · 2.63 KB
/
reduce_questions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# install.packages('ltm') # logistic distribution estimation
library(ltm)
# Goal: Reduce the 25-item IQ test to a short form 9-item IQ test
# Data: http://openpsychometrics.org/_rawdata/IQ1.zip
# load dataset
IQ1 <- read.csv('./IQ1/data.csv')
head(IQ1)
# select questions only
IQbin <- IQ1[, 1:25]
head(IQbin)
# a score of 10 means that the person answered correctly
# any score different from (less than) 10 indicates the person
# did not fully answer the question
IQbin[IQbin < 10] <- 0
IQbin[IQbin == 10] <- 1
head(IQbin)
# Fit Binary Unidimensional IRT 2PL model
IRT2pl <- ltm(IQbin ~ z1, IRT.param = TRUE)
# Step 1: Remove misfitting items
# return the goodness of fit of the items with chi-squared test
# if p-value < 0.05 then item misfits
fit <- item.fit(IRT2pl)
fit
# put p-values along with item names into a dataframe
fit_pv <- as.data.frame(fit$p.values)
fit_pv
# take a subset of items and p-values by excluding the misfitting values
fit_pv <- subset(fit_pv, `fit$p.values` >= 0.05)
fit_pv
plot(IRT2pl, type='IIC') # for all items
plot(IRT2pl, type='IIC', items=0) # sum for all items
# at each level of theta, what is the amount of information in each question
info <- as.data.frame(list(plot(IRT2pl, type='IIC')))
info
# Step 2: Select items that provide the most information at given levels
# of theta. This would allow the short form test to provide information
# for students with low, medium, and high levels of theta (-1.5, 0, +1.5).
# take the row at theta ~ -1.5, 0, 1.5
low_abl <- t(as.data.frame(info[31, 2:26]))
med_abl <- t(as.data.frame(info[51, 2:26]))
high_abl <- t(as.data.frame(info[70, 2:26]))
# merge non-misfit items to information dataframe
fit_pv1 <- merge(fit_pv, low_abl, by=0, all.x=TRUE)
fit_pv2 <- merge(fit_pv, med_abl, by=0, all.x=TRUE)
fit_pv3 <- merge(fit_pv, high_abl, by=0, all.x=TRUE)
# col1 = item number
# col2 = how well it fits the model
# col3 = the information that question has
fit_pv1
# sort each dataframe by the highest information values. This would allow
# for the selection of the items with the most information at each level of
# ability. By doing so, we can select the top-most 3 items from each
# dataframe (which represents a level of theta) and
# create the short form test that has 9 items
fit_pv1[order(-fit_pv1$`31`),][1:5,]
fit_pv2[order(-fit_pv2$`51`),][1:5,]
fit_pv3[order(-fit_pv3$`70`),][1:5,]
# select the 9 most informative items
IQbin.9 <- IQbin[, c(16, 15, 3, 10, 19, 24, 18, 12, 23)]
# fit 2PL model
IRT2pl_9 <- ltm(IQbin.9 ~ z1, IRT.param = TRUE)
par(mfrow=c(1,2)) # set the plotting area into a 1*2 array
plot(IRT2pl, type='IIC', items=0)
plot(IRT2pl_9, type='IIC', items=0)
par(mfrow=c(1,1))