<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD with MathML3 v1.2 20190208//EN" "JATS-journalpublishing1-mathml3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en">
<front>
<journal-meta><journal-id journal-id-type="publisher-id">METH</journal-id><journal-id journal-id-type="nlm-ta">Methodology</journal-id>
<journal-title-group>
<journal-title>Methodology</journal-title><abbrev-journal-title abbrev-type="pubmed">Methodology</abbrev-journal-title>
</journal-title-group>
<issn pub-type="ppub">1614-1881</issn>
<issn pub-type="epub">1614-2241</issn>
<publisher><publisher-name>PsychOpen</publisher-name></publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">meth.16999</article-id>
<article-id pub-id-type="doi">10.5964/meth.16999</article-id>
<article-categories>
<subj-group subj-group-type="heading"><subject>Original Article</subject></subj-group>

<subj-group subj-group-type="badge">
<subject>Data</subject>
<subject>Code</subject>
<subject>Materials</subject>
</subj-group>

</article-categories>
<title-group>
<article-title>Analyzing Group Differences and Measurement Fairness in Process Data: A Sequential Response Model With Covariates</article-title>
<alt-title alt-title-type="right-running">Group Differences and Fairness via Process Data</alt-title>
<alt-title specific-use="APA-reference-style" xml:lang="en">Analyzing group differences and measurement fairness in process data: A sequential response model with covariates</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0001-7604-9802</contrib-id><name name-style="western"><surname>Han</surname><given-names>Yuting</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref><xref ref-type="aff" rid="aff3"><sup>3</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-2051-5453</contrib-id><name name-style="western"><surname>Ji</surname><given-names>Feng</given-names></name><xref ref-type="aff" rid="aff4"><sup>4</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-7215-2324</contrib-id><name name-style="western"><surname>Chen</surname><given-names>Yunxiao</given-names></name><xref ref-type="aff" rid="aff5"><sup>5</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0009-0009-0745-9918</contrib-id><name name-style="western"><surname>Gan</surname><given-names>Kaiyu</given-names></name><xref ref-type="aff" rid="aff6"><sup>6</sup></xref></contrib>
<contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-3472-9102</contrib-id><name name-style="western"><surname>Liu</surname><given-names>Hongyun</given-names></name><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="aff" rid="aff6"><sup>6</sup></xref></contrib>
<contrib contrib-type="editor">
<name>
	<surname>Nájera Álvarez</surname>
	<given-names>Pablo</given-names>
</name>
<xref ref-type="aff" rid="aff7"/>
</contrib>
<aff id="aff1"><label>1</label><institution content-type="dept">Cognitive Science and Allied Health School</institution>, <institution>Beijing Language and Culture University</institution>, <addr-line><city>Beijing</city></addr-line>, <country country="CN">China</country></aff>
<aff id="aff2"><label>2</label><institution content-type="dept">Institute of Life and Health Sciences</institution>, <institution>Beijing Language and Culture University</institution>, <addr-line><city>Beijing</city></addr-line>, <country country="CN">China</country></aff>
<aff id="aff3"><label>3</label><institution content-type="dept">Key Laboratory of Language and Cognitive Science (Ministry of Education)</institution>, <institution>Beijing Language and Culture University</institution>, <addr-line><city>Beijing</city></addr-line>, <country country="CN">China</country></aff>
	<aff id="aff4"><label>4</label><institution content-type="dept">Department of Applied Psychology and Human Development</institution>, <institution>University of Toronto</institution>, <addr-line><city>Toronto</city></addr-line>, <addr-line><state>ON</state></addr-line><addr-line>, <country country="CA">Canada</country></addr-line></aff>
<aff id="aff5"><label>5</label><institution content-type="dept">Department of Statistics</institution>, <institution>London School of Economics and Political Science</institution>, <addr-line><city>London</city></addr-line>, <country country="GB">United Kingdom</country></aff>
<aff id="aff6"><label>6</label><institution content-type="dept">Beijing Key Laboratory of Applied Experimental Psychology, National Demonstration Center for Experimental Psychology Education (Beijing Normal University), Faculty of Psychology</institution>, <institution>Beijing Normal University</institution>, <addr-line><city>Beijing</city></addr-line>, <country country="CN">China</country></aff>
	<aff id="aff7">Universidad Pontificia Comillas, Madrid, <country>Spain</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>*</label>Faculty of Psychology, Beijing Normal University, No. 19, XinJieKouWai St., HaiDian District, Beijing, People’s Republic of China. <email xlink:href="hyliu@bnu.edu.cn">hyliu@bnu.edu.cn</email></corresp>
</author-notes>
<pub-date date-type="pub" publication-format="electronic"><day>27</day><month>03</month><year>2026</year></pub-date>
<pub-date pub-type="collection" publication-format="electronic"><year>2026</year></pub-date>
<volume>22</volume>
<issue>1</issue>

<fpage>1</fpage>
<lpage>26</lpage>
<history>
<date date-type="received">
<day>11</day>
<month>02</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions><copyright-year>2026</copyright-year><copyright-holder>Han, Ji, Chen et al.</copyright-holder><license license-type="open-access" specific-use="CC BY 4.0" xlink:href="https://creativecommons.org/licenses/by/4.0/"><ali:license_ref>https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open access article distributed under the terms of the Creative Commons Attribution 4.0 International License, CC BY 4.0, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p></license></permissions>
<abstract>
<p>This article introduces the sequential response model with covariates (SRM-C) for analyzing process data, with emphasis on three key capabilities: detecting potential measurement bias in response processes, evaluating group differences in ability distributions and improving parameter estimation precision. The SRM-C combines measurement and structural components, with the measurement component modeling response sequences conditional on abilities and covariates, and the structural component characterizing group-specific ability distributions. Sparsity assumptions implemented through horseshoe prior distributions address identification issues within the Bayesian framework. Monte Carlo simulations demonstrated robust parameter recovery and effective differential item functioning (DIF) detection. An empirical analysis of PISA problem-solving data illustrated the model’s utility in distinguishing ability differences from potential measurement bias. The SRM-C offers a comprehensive framework for understanding group differences in process data while ensuring measurement fairness.</p>
</abstract>
<kwd-group kwd-group-type="author"><kwd>computer-based assessment</kwd><kwd>process data</kwd><kwd>differential item functioning</kwd><kwd>measurement invariance</kwd><kwd>Bayesian regularization</kwd></kwd-group>

</article-meta>
</front>
<body>
	<sec sec-type="intro" id="intro"><title/>	
<p>Computer-based assessments (CBAs) have emerged as a transformative approach in psychological and educational measurement, particularly for evaluating higher-order cognitive skills (<xref ref-type="bibr" rid="r29">Liu et al., 2018</xref>; <xref ref-type="bibr" rid="r54">Shute &amp; Moore, 2017</xref>). The adoption of CBAs has been driven by their capacity to provide more authentic assessment contexts while reducing test anxiety through enhanced engagement (<xref ref-type="bibr" rid="r2">Banfield &amp; Wilkerson, 2014</xref>; <xref ref-type="bibr" rid="r28">Li et al., 2015</xref>). This shift toward computer-based evaluation is evidenced by the implementation of interactive assessment systems in major international educational initiatives, including the Programme for International Student Assessment (PISA) in 2012, 2015, and 2018 (<xref ref-type="bibr" rid="r42">OECD, 2014</xref>, <xref ref-type="bibr" rid="r44">2017</xref>), the Programme for International Assessment of Adult Competencies (PIAAC) in 2012 (<xref ref-type="bibr" rid="r15">Goodman et al., 2013</xref>; <xref ref-type="bibr" rid="r43">OECD, 2016</xref>; <xref ref-type="bibr" rid="r52">Schleicher, 2008</xref>), the Assessment and Teaching of 21<sup>st</sup> Century Skills project (ATC21S) (<xref ref-type="bibr" rid="r16">Griffin et al., 2012</xref>), and the National Assessment of Education Progress (NAEP) (<xref ref-type="bibr" rid="r41">National Center for Education Statistics, 2014</xref>).</p>
<p>CBAs generate rich process data that enables comprehensive assessment validation, enhanced measurement precision, and detailed analysis of response patterns, group differences, and behavioral patterns (<xref ref-type="bibr" rid="r11">Ercikan &amp; Pellegrino, 2017</xref>; <xref ref-type="bibr" rid="r36">Mislevy et al., 2014</xref>). Researchers have leveraged this process data to evaluate problem-solving abilities through various methodological approaches (<xref ref-type="bibr" rid="r22">Hesse et al., 2015</xref>; <xref ref-type="bibr" rid="r55">Siddiq et al., 2017</xref>; <xref ref-type="bibr" rid="r62">Xiao et al., 2022</xref>). In tasks with finite state spaces, response sequences can be modeled as stochastic processes, where psychometric models incorporate both frequency and correctness of state transitions to estimate underlying abilities (<xref ref-type="bibr" rid="r9">Chen, 2020</xref>; <xref ref-type="bibr" rid="r12">Fu et al., 2023</xref>; <xref ref-type="bibr" rid="r19">Han et al., 2022</xref>; <xref ref-type="bibr" rid="r20">Han &amp; Wilson, 2022</xref>; <xref ref-type="bibr" rid="r26">LaMar, 2018</xref>; <xref ref-type="bibr" rid="r53">Shu et al., 2017</xref>; <xref ref-type="bibr" rid="r61">Xiao &amp; Liu, 2024</xref>; <xref ref-type="bibr" rid="r64">Zhan &amp; Qiao, 2022</xref>). These approaches offer interpretable ability estimates while utilizing the complete response process.</p>
		<p>Test fairness, particularly differential item functioning (DIF), is a critical consideration in educational and psychological assessment. DIF occurs when examinees of comparable ability levels from different demographic groups exhibit systematic differences in their item responses (<xref ref-type="bibr" rid="r1">American Educational Research Association et al., 2014</xref>). The methodological framework for DIF detection encompasses diverse approaches, including the Mantel-Haenszel procedure (<xref ref-type="bibr" rid="r23">Holland &amp; Thayer, 1988</xref>), item response theory-based methods (<xref ref-type="bibr" rid="r30">Lord, 1980</xref>; <xref ref-type="bibr" rid="r49">Raju, 1988</xref>), logistic regression techniques (<xref ref-type="bibr" rid="r57">Swaminathan &amp; Rogers, 1990</xref>), likelihood ratio tests (<xref ref-type="bibr" rid="r59">Thissen et al., 1993</xref>), and graphical procedures (<xref ref-type="bibr" rid="r31">Magis et al., 2010</xref>; <xref ref-type="bibr" rid="r63">Yuan et al., 2021</xref>). More recently, LASSO-type regularized estimation procedures have been developed to address model selection and parameter estimation simultaneously (<xref ref-type="bibr" rid="r3">Bauer et al., 2020</xref>; <xref ref-type="bibr" rid="r4">Belzak &amp; Bauer, 2020</xref>; <xref ref-type="bibr" rid="r24">Huang, 2018</xref>; <xref ref-type="bibr" rid="r32">Magis et al., 2015</xref>; <xref ref-type="bibr" rid="r51">Schauberger &amp; Mair, 2020</xref>; <xref ref-type="bibr" rid="r60">Tutz &amp; Schauberger, 2015</xref>). Within the Bayesian paradigm, regularization effects comparable to LASSO have been achieved through specialized prior distributions, including the Laplace prior (<xref ref-type="bibr" rid="r8">Casella et al., 2010</xref>; <xref ref-type="bibr" rid="r45">Park &amp; Casella, 2008</xref>), spike-and-slab prior (<xref ref-type="bibr" rid="r37">Mitchell &amp; Beauchamp, 1988</xref>), and horseshoe prior (<xref ref-type="bibr" rid="r6">Carvalho et al., 2009</xref>, <xref ref-type="bibr" rid="r7">2010</xref>; <xref ref-type="bibr" rid="r46">Piironen &amp; Vehtari, 2017</xref>; <xref ref-type="bibr" rid="r47">Polson &amp; Scott, 2011</xref>), enabling anchor-free DIF analysis.</p>
<p>While traditional DIF detection methods analyze item responses, CBAs present unique challenges for fairness evaluation due to their complex process data structure. Unlike conventional test responses, process data typically consists of variable-length action sequences, necessitating more sophisticated analytical approaches for identifying potential DIF. Currently, the DIF detecting methods that can be used with process data are still limited to our knowledge, particularly regarding the simultaneous estimation of latent abilities and detection of DIF while controlling for confounding. To address these methodological challenges, we propose the sequential response model with covariates (SRM-C), which makes several methodological contributions that distinguish it from existing approaches. First, unlike traditional DIF methods that analyze discrete item responses, the SRM-C detects differential functioning in sequential state transitions, accommodating the variable-length action sequences characteristic of process data. Second, the model simultaneously estimates group-specific ability distributions while controlling for potential measurement bias in transition processes, enabling separation of genuine ability differences from DIF effects. Third, the SRM-C implements anchor-free DIF detection through horseshoe priors adapted for transition matrices, reflecting the assumption that most state transitions are DIF-free. These innovations enable three key analytical capabilities: detecting potential measurement bias in response processes, evaluating group differences in ability distributions, and improving parameter estimation precision.</p>
<p>This article proceeds as follows: We first introduce the SRM-C and detail its Bayesian estimation framework. We then evaluate the model’s parameter recovery capabilities through simulation studies. The model’s practical utility is demonstrated through an empirical analysis of problem-solving process data, with particular attention to DIF detection. We conclude by discussing implications and future directions for process data analysis in educational measurement.</p></sec>
<sec sec-type="other1"><title>The Sequential Response Model With Covariates</title>
<p><xref ref-type="bibr" rid="r34">Mayer and Wittrock (2006)</xref> define problem solving as “cognitive processing directed at transforming a given situation into a goal situation when no obvious method of solution is available to the problem solver”. This process is inherently personalized (depending on the solver’s existing knowledge and abilities), cognitive (occurring within the solver’s cognitive system), goal-directed (aimed at achieving specific objectives), and process-based (involving sequential mental computations and representations) (<xref ref-type="bibr" rid="r33">Mayer, 1992</xref>; <xref ref-type="bibr" rid="r34">Mayer &amp; Wittrock, 2006</xref>). The comprehensive recording of external behavioral processes — represented by transitions between problem states from initial to goal states — provides essential information for measuring latent problem-solving ability.</p>
<p>In CBA, a task state is defined as the cumulative sum of system changes resulting from actions taken from the beginning to a given moment. This includes all factors associated with decisions made within the task, which together constitute complex performance (<xref ref-type="bibr" rid="r26">LaMar, 2018</xref>). Consequently, each action chosen is equivalent to selecting the subsequent state achievable within the current task state. In well-designed tasks, the reachable states in the next step are a finite set that depends on the current state (<xref ref-type="bibr" rid="r19">Han et al., 2022</xref>; <xref ref-type="bibr" rid="r61">Xiao &amp; Liu, 2024</xref>). <xref ref-type="fig" rid="f1">Figure 1</xref> illustrates a simplified state transition diagram where A denotes the initial state, B an intermediate state, C the target state, and # a premature termination state. Transitions between states occur through actions (<italic>a</italic><sub>1</sub>-<italic>a</italic><sub>4</sub>), where solid arrows indicate correct transitions (A→B, B→C) that progress toward the target state, while dotted arrows represent incorrect transitions that either deviate from or fail to advance toward the solution. The optimal solution path is represented by the state sequence A→B→C.</p><fig id="f1" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 1</label><caption>
<title>State Transition Diagram for Task Response Process</title><p><italic>Note.</italic> A, B, C, and # represent initial, intermediate, target, and termination states, respectively. Actions <italic>a</italic><sub>1</sub>-<italic>a</italic><sub>4</sub> facilitate state transitions, with solid arrows indicating correct transitions and dotted arrows indicating incorrect ones.</p></caption><graphic xlink:href="meth.16999-f1" position="anchor" orientation="portrait"/></fig>
<sec><title>The Sequential Response Model</title>
	<p>The response process can be conceptualized as a sequence of task states following a temporal stochastic process with the conditional Markov property, grounded in problem-solving theory where an individual’s next action depends on both the current problem state and their latent problem-solving ability (<xref ref-type="bibr" rid="r53">Shu et al., 2017</xref>). For a task with finite states <bold><italic>s</italic></bold> = {<italic>s</italic><sub>1</sub>, <italic>s</italic><sub>2</sub>, <italic>s</italic><sub>3</sub>, … , <italic>s</italic><sub>z</sub>}, the sequential response model (SRM; <xref ref-type="bibr" rid="r19">Han et al., 2022</xref>) models the probability of transitioning to the next state <inline-formula><mml:math id="m1"><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> conditional on the current state <inline-formula><mml:math id="m2"><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and latent ability <inline-formula><mml:math id="m3"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>:</p><disp-formula id="e1"><mml:math id="m4"><mml:mrow><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>exp</mml:mi><mml:mfenced><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow><mml:mrow><mml:msub><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mi>exp</mml:mi><mml:mfenced><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:mi>s</mml:mi><mml:mtext> </mml:mtext><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>1</label></disp-formula>
	<p>where <inline-formula><mml:math id="m5"><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo> </mml:mo></mml:mrow></mml:math></inline-formula>denotes the set of reachable states from <inline-formula><mml:math id="m6"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. <inline-formula><mml:math id="m7"><mml:mrow><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula> is a binary indicator (<inline-formula><mml:math id="m8"><mml:mn>1</mml:mn></mml:math></inline-formula> for correct, <inline-formula><mml:math id="m9"><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> for incorrect transitions) specified a priori in <inline-formula><mml:math id="m10"><mml:mi mathvariant="script">R</mml:mi></mml:math></inline-formula>. The transition tendency parameter <inline-formula><mml:math id="m11"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> captures task-specific characteristics — the inherent propensity for transitioning from state <inline-formula><mml:math id="m12"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> to state <inline-formula><mml:math id="m13"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. It functions analogously to item difficulty parameters in traditional item response theory (IRT) models. Positive values indicate transitions that are relatively easy to occur, while negative values indicate more difficult transitions. Empirically, these parameters typically range from -3 to 3, consistent with standard IRT parameterization. When combined with the directional indicator <inline-formula><mml:math id="m14"><mml:mrow><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula>, the model ensures that higher-ability individuals are more likely to make correct transitions (<inline-formula><mml:math id="m15"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for correct transitions) and less likely to make errors (<inline-formula><mml:math id="m16"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for incorrect transitions), consistent with established psychometric principles. Note that the SRM focuses on transition directionality (correct vs. incorrect) rather than discrimination, as the directional indicator <inline-formula><mml:math id="m17"><mml:mrow><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula> already captures this information. The vector <inline-formula><mml:math id="m18"><mml:mi>a</mml:mi></mml:math></inline-formula> contains all transition tendency parameters. As a multinomial logit model, the SRM requires <inline-formula><mml:math id="m19"><mml:mrow><mml:munder><mml:mstyle mathsize="95%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:munder><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> for identification (<xref ref-type="bibr" rid="r35">McFadden, 1974</xref>; <xref ref-type="bibr" rid="r58">Thissen &amp; Steinberg, 1986</xref>).</p>
<sec><title>The Sequential Response Model With Covariates</title>
<p>For process data, we define differential item functioning (DIF) as systematic differences in state transition probabilities between groups of examinees with equal ability levels, attributable to construct-irrelevant variables. Building upon the foundational SRM framework, the SRM-C introduces three key extensions to address group differences and measurement fairness in process data. The measurement component incorporates covariate effects on state transition probabilities, the structural component allows for group-specific ability distributions, and the identification strategy employs horseshoe priors to enable anchor-free DIF detection without requiring pre-specified invariant transitions. For illustration, we present the model for a two-group scenario where the covariate is binary, with values 0 and 1 representing membership in the reference and focal groups, respectively.</p>
<sec><title>Measurement Model</title>
	<p>The measurement component of the SRM-C models the relationship between response sequences and latent abilities while accounting for task features and covariates. This component extends <xref ref-type="disp-formula" rid="e1">Equation (1)</xref> by incorporating covariate effects:</p><disp-formula id="e2"><mml:math id="m20"><mml:mrow><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>exp</mml:mtext><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow><mml:mrow><mml:msub><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mtext>exp</mml:mtext><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mtext> </mml:mtext><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:mi>s</mml:mi><mml:mtext> </mml:mtext><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>2</label></disp-formula>
<p>where <inline-formula><mml:math id="m21"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> denotes the covariate indicating group membership, and the covariate effect parameter <inline-formula><mml:math id="m22"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> quantifies differential transition probabilities between groups at equal ability levels. When <inline-formula><mml:math id="m23"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&gt;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>, the focal group (<inline-formula><mml:math id="m24"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>) shows higher propensity for transition <inline-formula><mml:math id="m25"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> → <inline-formula><mml:math id="m26"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> compared to the reference group (<inline-formula><mml:math id="m27"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>) at the same ability level. This constitutes uniform DIF detection in the context of process data, where group differences manifest as horizontal shifts in transition probability functions. The vector <inline-formula><mml:math id="m28"><mml:mi>b</mml:mi></mml:math></inline-formula> contains all covariate effects, and other notation follows <xref ref-type="disp-formula" rid="e1">Equation (1)</xref>.</p></sec>
<sec><title>Structural Model</title>
<p>The structural component characterizes group-specific ability distributions:</p><disp-formula id="e3"><mml:math id="m29"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>~</mml:mo><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mi>μ</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msup><mml:mi>σ</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:mfenced><mml:mo>.</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>3</label></disp-formula>
<p>Abilities in the reference group (<inline-formula><mml:math id="m30"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>) follow a standard normal distribution to identify the location and scale of the latent trait, while abilities in the focal group (<inline-formula><mml:math id="m31"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>) follow <inline-formula><mml:math id="m32"><mml:mrow><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mi>μ</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>σ</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="m33"><mml:mi>μ</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math id="m34"><mml:mi>σ</mml:mi></mml:math></inline-formula> are group-specific parameters. While the normality assumption represents a simplification of potentially more complex ability distributions, empirical research consistently demonstrates that cognitive abilities approximate normal distributions in educational contexts, and this assumption aligns with standard practice in psychometric modeling, including IRT and structural equation modeling approaches. In addition, allowing different location and scale parameters across groups accommodates realistic scenarios where groups differ in both mean ability and variability — a common finding in cross-cultural and demographic studies — thereby maintaining sufficient flexibility while avoiding overfitting risks associated with more complex distributional assumptions.</p>
<p>For an examinee with sequence length <inline-formula><mml:math id="m35"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, the complete SRM-C combines <xref ref-type="disp-formula" rid="e2">Equations (2)</xref> and <xref ref-type="disp-formula" rid="e3">(3)</xref>. <xref ref-type="fig" rid="f2">Figure 2</xref> presents the path diagram of the model. The SRM-C framework is specifically designed for tasks with well-defined state spaces and clear transition correctness criteria, such as computer-based problem-solving assessments. The model’s strength lies in its ability to simultaneously account for process complexity and group heterogeneity while maintaining interpretability.</p><fig id="f2" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 2</label><caption>
<title>Path Diagram of the SRM-C</title><p><italic>Note</italic>. Subscript <inline-formula><mml:math id="m36"><mml:mtext>i</mml:mtext></mml:math></inline-formula> is omitted for simplicity. Dashed lines from <inline-formula><mml:math id="m37"><mml:mi>x</mml:mi></mml:math></inline-formula> to <inline-formula><mml:math id="m38"><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represent DIF effects.</p></caption><graphic xlink:href="meth.16999-f2" position="anchor" orientation="portrait"/></fig></sec>
<sec><title>Model Identification</title>
	<p>Similar to the SRM, the SRM-C requires constraints <inline-formula><mml:math id="m39"><mml:mrow><mml:munder><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:munder><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m40"><mml:mrow><mml:munder><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:munder><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> for identification. The marginal likelihood function is:</p><disp-formula id="e4"><mml:math id="m41"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mo>∫</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:munderover><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mfrac><mml:mrow><mml:mtext>exp</mml:mtext><mml:mfenced><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow><mml:mrow><mml:msub><mml:mstyle mathsize="90%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mtext>exp</mml:mtext><mml:mfenced><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo mathsize="60%">∙</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:mfrac><mml:mo stretchy="false">)</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mo>√</mml:mo><mml:mn>2</mml:mn><mml:mi>π</mml:mi></mml:mrow></mml:mfrac><mml:mi>exp</mml:mi><mml:mfenced><mml:mrow><mml:mo>−</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mfenced><mml:mrow><mml:mi>θ</mml:mi><mml:mo>−</mml:mo><mml:mi>μ</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:msup><mml:mi>σ</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mfenced><mml:mi>d</mml:mi><mml:mi>θ</mml:mi><mml:mo>.</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>4</label></disp-formula>
	<p>The model exhibits a location shift invariance: for any constant <inline-formula><mml:math id="m42"><mml:mi>c</mml:mi></mml:math></inline-formula>, replacing <inline-formula><mml:math id="m43"><mml:mi>μ</mml:mi></mml:math></inline-formula> with <inline-formula><mml:math id="m44"><mml:mrow><mml:mi>μ</mml:mi><mml:mo>+</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m45"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> with <inline-formula><mml:math id="m46"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:mi>c</mml:mi><mml:mo mathsize="60%">∙</mml:mo><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>h</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula> yields an equivalent model. To resolve this non-identification issue (<xref ref-type="bibr" rid="r50">San Martín, 2016</xref>), we adopt a sparsity assumption that most state transitions are DIF-free, implemented through a horseshoe prior distribution. This approach aligns with common DIF detection methods that do not require anchor items (<xref ref-type="bibr" rid="r10">Chen et al., 2023</xref>) and offers favorable theoretical properties with straightforward implementation (<xref ref-type="bibr" rid="r6">Carvalho et al., 2009</xref>, <xref ref-type="bibr" rid="r7">2010</xref>).</p></sec></sec>
<sec><title>Bayesian Estimation</title>
	<p>Under the conditional first-order Markov assumption, the joint probability of a state sequence <inline-formula><mml:math id="m47"><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is:</p><disp-formula id="e5"><mml:math id="m48"><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:munderover><mml:mstyle mathsize="95%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>5</label></disp-formula>
	<p>The joint posterior distribution is:</p><disp-formula id="e">	<mml:math id="m49"><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>∝</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mi>p</mml:mi><mml:mfenced><mml:mrow><mml:mi>θ</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow></mml:mfenced></mml:mrow></mml:math></disp-formula><disp-formula id="e___1">	<mml:math id="m50"><mml:mrow><mml:mo>=</mml:mo><mml:munderover><mml:mstyle mathsize="95%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:munderover><mml:munderover><mml:mstyle mathsize="95%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mi>p</mml:mi><mml:mfenced><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>a</mml:mi></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>b</mml:mi></mml:mfenced></mml:mrow></mml:math></disp-formula><disp-formula id="e6"><mml:math id="m51"><mml:mrow><mml:mo>=</mml:mo><mml:munderover><mml:mstyle mathsize="95%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:munderover><mml:munderover><mml:mstyle mathsize="95%" displaystyle="true"><mml:mo>∏</mml:mo></mml:mstyle><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi mathvariant="script">R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mi>p</mml:mi><mml:mfenced><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>μ</mml:mi><mml:mo>,</mml:mo><mml:mi>σ</mml:mi></mml:mrow></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>μ</mml:mi></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>σ</mml:mi></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>a</mml:mi></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mrow><mml:mi>b</mml:mi><mml:mo>|</mml:mo><mml:mi>λ</mml:mi><mml:mo>,</mml:mo><mml:mi>τ</mml:mi></mml:mrow></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>λ</mml:mi></mml:mfenced><mml:mi>p</mml:mi><mml:mfenced><mml:mi>τ</mml:mi></mml:mfenced><mml:mo>.</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>6</label></disp-formula>
	<p>The model extends the standard SRM assumptions to accommodate group differences. For ability parameters, the SRM-C allows group-specific distributions <inline-formula><mml:math id="m52"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>~</mml:mo><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mi>μ</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msup><mml:mi>σ</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> and maintains normal priors for state transition parameters <inline-formula><mml:math id="m53"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>~</mml:mo><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula>, with <inline-formula><mml:math id="m54"><mml:mrow><mml:mi>μ</mml:mi><mml:mo>~</mml:mo><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m55"><mml:mrow><mml:msup><mml:mi>σ</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>~</mml:mo><mml:mtext>Inv</mml:mtext></mml:mrow></mml:math></inline-formula>-<inline-formula><mml:math id="m56"><mml:mrow><mml:mtext>Gamma</mml:mtext><mml:mfenced><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula>. The key methodological innovation lies in the DIF parameter specification, where we adopt the horseshoe prior for the covariate coefficients: <inline-formula><mml:math id="m57"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>~</mml:mo><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msubsup><mml:mi>λ</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo mathsize="60%">∙</mml:mo><mml:msup><mml:mi>τ</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfenced><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> <inline-formula><mml:math id="m58"><mml:mrow><mml:msub><mml:mi>λ</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>~</mml:mo><mml:mtext>half</mml:mtext></mml:mrow></mml:math></inline-formula>-<inline-formula><mml:math id="m59"><mml:mrow><mml:mtext>Cauchy</mml:mtext><mml:mfenced><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo> </mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> <inline-formula><mml:math id="m60"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>∈</mml:mo><mml:mi>s</mml:mi></mml:mrow></mml:math></inline-formula>. Following <xref ref-type="bibr" rid="r46">Piironen and Vehtari (2017)</xref>, the effective number of non-zero coefficients <inline-formula><mml:math id="m61"><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mtext>eff</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> can be approximated as</p><disp-formula id="e7"><mml:math id="m62"><mml:mrow><mml:mi>E</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mtext>eff</mml:mtext></mml:mrow></mml:msub><mml:mo mathsize="40%">|</mml:mo><mml:mi>τ</mml:mi><mml:mo>,</mml:mo><mml:mi>σ</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>τ</mml:mi><mml:msup><mml:mi>σ</mml:mi><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:msqrt><mml:mi>n</mml:mi></mml:msqrt></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>τ</mml:mi><mml:msup><mml:mi>σ</mml:mi><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>√</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:mfrac><mml:mo mathsize="60%">∙</mml:mo><mml:mi>D</mml:mi><mml:mo>,</mml:mo><mml:mtext> </mml:mtext></mml:mrow></mml:math><label>7</label></disp-formula>
<p>where <inline-formula><mml:math id="m63"><mml:mi>τ</mml:mi></mml:math></inline-formula> represents the global shrinkage parameter, <inline-formula><mml:math id="m64"><mml:mi>σ</mml:mi></mml:math></inline-formula> denotes the noise standard deviation, <inline-formula><mml:math id="m65"><mml:mi>n</mml:mi></mml:math></inline-formula> is the total sample size, and <inline-formula><mml:math id="m66"><mml:mi>D</mml:mi></mml:math></inline-formula> indicates the number of state transitions. For our simulation conditions with <inline-formula><mml:math id="m67"><mml:mrow><mml:mi>τ</mml:mi><mml:mo>=</mml:mo><mml:mn>0.2</mml:mn><mml:mo>,</mml:mo><mml:mo> </mml:mo><mml:mi>σ</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m68"><mml:mrow><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:mn>18</mml:mn></mml:mrow></mml:math></inline-formula>, this corresponds to expected DIF proportions of approximately 76% (<inline-formula><mml:math id="m69"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>1000</mml:mn></mml:mrow></mml:math></inline-formula>) and 82% (<inline-formula><mml:math id="m70"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>2000</mml:mn></mml:mrow></mml:math></inline-formula>). This choice reflects a conservative approach prioritizing the detection of measurement bias over model parsimony. In DIF detection, false negatives pose greater threats to educational equity than false positives, as undetected bias can perpetuate unfair assessment practices. Overly aggressive sparsity assumptions risk shrinking genuine DIF effects toward zero, potentially masking critical fairness violations. In addition, the horseshoe prior’s hierarchical structure mitigates these concerns through its dual-layer shrinkage mechanism: while the global parameter <inline-formula><mml:math id="m71"><mml:mrow><mml:mi>τ</mml:mi><mml:mo> </mml:mo></mml:mrow></mml:math></inline-formula>establishes a lenient framework, the local parameters <inline-formula><mml:math id="m72"><mml:mrow><mml:msub><mml:mi>λ</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> provide adaptive regularization based on empirical evidence. This design preserves the prior’s ability to distinguish signal from noise while reducing the risk of overlooking substantive bias effects.</p>
	<p>Parameters are estimated using Markov chain Monte Carlo with a Gibbs sampler incorporating Metropolis-Hastings steps (<xref ref-type="bibr" rid="r21">Hastings, 1970</xref>). Detailed estimation procedures are provided in Supplemental Materials Section A (see <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>).</p></sec></sec></sec>
<sec sec-type="other2"><title>Simulation Study</title>
<p>A Monte Carlo simulation study was conducted to evaluate three aspects of the SRM-C: parameter recovery capabilities, relative performance compared to the SRM, and efficacy in detecting DIF in response processes.</p>
<sec><title>Design</title>
	<p>The simulation design followed a state transition framework with eight states (<xref ref-type="bibr" rid="r19">Han et al., 2022</xref>; see Supplemental Materials Section B for more details, in <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>).</p>
<p>The study employed a factorial design with four manipulated factors:</p>
<list id="L1" list-type="order">
<list-item>
<p><italic>Sample size</italic> (<italic>n</italic>): 1000, 2000, with equal group sizes (i.e., 500 and 1000 per group, respectively).</p></list-item>
<list-item>
	<p><italic>Sequence length</italic>: Short (≤ 10 transitions), Medium (~20 transitions) and Long (~40 transitions). Sequence length was controlled by transition parameters, with larger values for transitions returning to states distant from the target producing longer sequences (see Table B2 in the Supplemental Materials in <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>).</p></list-item>
<list-item>
	<p><italic>Group difference</italic>: Abilities were drawn from <inline-formula><mml:math id="m73"><mml:mrow><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> for both groups in the no-difference condition, and from <inline-formula><mml:math id="m74"><mml:mrow><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> for the reference group and <inline-formula><mml:math id="m75"><mml:mrow><mml:mi>N</mml:mi><mml:mfenced><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mn>1.5</mml:mn></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> for the focal group in the difference condition.</p></list-item>
<list-item>
	<p><italic>DIF Pattern</italic>: Three conditions were examined: DIF-free where all covariate coefficients (<inline-formula><mml:math id="m76"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>) were set to 0, balanced DIF where the focal group showed higher probability for the correct transition E→F but lower probability for correct transition C→D, and unbalanced DIF where the focal group showed lower probabilities for both correct transitions (C→D and E→F). The specific coefficient values are shown in <xref ref-type="table" rid="t1">Table 1</xref>.</p></list-item>
</list>
<table-wrap id="t1" position="anchor" orientation="portrait">
<label>Table 1</label><caption><title>True Values of Covariate Coefficients by DIF Pattern</title></caption>
<table frame="hsides" rules="groups">
<col width="15%" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th>DIF Pattern</th>
<th>	<inline-formula><mml:math id="m77"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th>	<inline-formula><mml:math id="m78"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th>	<inline-formula><mml:math id="m79"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th>	<inline-formula><mml:math id="m80"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mi>C</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th>	<inline-formula><mml:math id="m81"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mi>F</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th>	<inline-formula><mml:math id="m82"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mo>#</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
</tr>
</thead>
<tbody>
<tr>
<td>Balanced</td>
<td align="char" char=".">0.4</td>
<td align="char" char=".">0.6</td>
<td align="char" char=".">-1.0</td>
<td align="char" char=".">-1.0</td>
<td align="char" char=".">1.0</td>
<td>0</td>
</tr>
<tr>
<td>Unbalanced </td>
<td align="char" char=".">0.4</td>
<td align="char" char=".">0.6</td>
<td align="char" char=".">-1.0</td>
<td>0</td>
<td align="char" char=".">-1.0</td>
<td align="char" char=".">1.0</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Note.</italic> For state C, only C→D is correct. For state E, only E→F is correct.</p>
</table-wrap-foot>
</table-wrap>
<p>The simulation employed a 2(sample size) × 3(sequence length) × 2(group difference) × 3(DIF pattern) factorial design, yielding 36 conditions. Each condition was replicated 100 times with constant transition parameters and coefficients. Groups were balanced with equal numbers of examinees. Abilities were randomly generated per replication based on the group difference condition.</p>
<sec><title>Parameter Estimation</title>
	<p>Parameters were estimated using a custom Bayesian sampler in R (<xref ref-type="bibr" rid="r48">R Core Team, 2018</xref>), following the procedure described in Section A of the Supplemental Materials (see <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>). Each of two chains ran for 15,000 iterations, with 10,000 burn-in and thinning by 10. Random initial values were used, and estimation was restarted with new initial values if the potential scale reduction factor (PSRF) exceeded 1.2 (<xref ref-type="bibr" rid="r14">Gelman &amp; Rubin, 1992</xref>). The sampler code is available at <xref ref-type="bibr" rid="r17">Han and Ji (2025)</xref>.</p>
	<p>To assess robustness to prior specification, we conducted sensitivity analyses using <inline-formula><mml:math id="m83"><mml:mrow><mml:mi>τ</mml:mi><mml:mo></mml:mo><mml:mo>∈</mml:mo><mml:mo></mml:mo><mml:mfenced close="}" open="{"><mml:mrow><mml:mn>0.05</mml:mn><mml:mo>,</mml:mo><mml:mo></mml:mo><mml:mn>0.1</mml:mn><mml:mo>,</mml:mo><mml:mo></mml:mo><mml:mn>0.2</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> for a representative subset of conditions (<inline-formula><mml:math id="m84"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>2000</mml:mn></mml:mrow></mml:math></inline-formula>, medium sequence, with group difference and unbalanced DIF). Results remained consistent across this range, supporting our choice of <inline-formula><mml:math id="m85"><mml:mrow><mml:mi>τ</mml:mi><mml:mo>=</mml:mo><mml:mn>0.2</mml:mn></mml:mrow></mml:math></inline-formula> for balancing DIF detection sensitivity with error control. Complete sensitivity analyses are reported in Supplemental Materials Section D (see <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>).</p></sec>
<sec><title>Results of the Simulation Study</title>
<sec><title>Convergence and Model Fit Comparison</title>
	<p>All parameters converged with PSRF less than 1.2 (<xref ref-type="bibr" rid="r14">Gelman &amp; Rubin, 1992</xref>). Trace plots (Figure B1 in the Supplemental Materials in <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>) from an exemplar condition demonstrated convergence to the same posterior distributions despite different initial values. Similar convergence patterns were observed across all conditions.</p>
<p>Model fit was compared using the deviance information criterion (DIC; <xref ref-type="bibr" rid="r56">Spiegelhalter et al., 2002</xref>) and double log of pseudo Bayes factor (2log(PsBF<sub>21</sub>); <xref ref-type="bibr" rid="r27">Levy &amp; Mislevy, 2016</xref>). A lower DIC indicates better fit, and 2log(PsBF<sub>21</sub>) greater than 0 favors the SRM-C over the SRM, with values exceeding 10 suggesting strong evidence (<xref ref-type="bibr" rid="r25">Kass &amp; Raftery, 1995</xref>).</p>
	<p>Figure B3 in Supplemental Materials (see <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>) presents detailed fit indices across conditions from 100 replications. The SRM-C showed consistently better fit (lower DIC and positive 2log(PsBF21)) in conditions with DIF or ability differences between groups. This advantage increased with larger sample sizes and longer sequences. The two models showed comparable fit only in conditions with no DIF and no ability differences.</p></sec>
<sec><title>Estimation Accuracy</title>
	<p>Parameter recovery was evaluated using Root Mean Squared Error (RMSE). For ability parameters, posterior estimates were averaged across identical response patterns to account for MCMC sampling variability. <xref ref-type="fig" rid="f3">Figure 3</xref> presents RMSE values for both theta and item parameters, while <xref ref-type="fig" rid="f4">Figure 4</xref> displays the estimated ability distributions for the target group. Detailed numerical results, including bias statistics, correlations between estimated and true values, as well as RMSE and average occurrence frequency for individual covariate coefficients across conditions, are provided in Supplemental Materials B (see <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>).</p><fig id="f3" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 3</label><caption>
<title>RMSE Values for Ability Estimates (<inline-formula><mml:math id="m86"><mml:mi>θ</mml:mi></mml:math></inline-formula>), Transition Parameters (<inline-formula><mml:math id="m87"><mml:mi>a</mml:mi></mml:math></inline-formula>), and Covariate Coefficients (<inline-formula><mml:math id="m88"><mml:mi>b</mml:mi></mml:math></inline-formula>)</title></caption><graphic xlink:href="meth.16999-f3" position="anchor" orientation="portrait"/></fig><fig id="f4" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 4</label><caption>
<title>Mean Estimated Values of Target Group Ability Distribution Parameters: Mean (<inline-formula><mml:math id="m89"><mml:mi>μ</mml:mi></mml:math></inline-formula>) and Standard Deviation (<inline-formula><mml:math id="m90"><mml:mi>σ</mml:mi></mml:math></inline-formula>)</title></caption><graphic xlink:href="meth.16999-f4" position="anchor" orientation="portrait"/></fig>
<p>The SRM-C demonstrated robust parameter recovery across all conditions. RMSE values were below .35 for ability estimates, below .15 for transition parameters, and below .20 for covariate coefficients. Focal group distribution parameters were accurately recovered in most conditions, with mean and standard deviation estimates close to true values (<inline-formula><mml:math id="m91"><mml:mi>μ</mml:mi></mml:math></inline-formula> = 0, <inline-formula><mml:math id="m92"><mml:mrow><mml:mi>σ</mml:mi><mml:mo> </mml:mo></mml:mrow></mml:math></inline-formula>= 1 for no difference; <inline-formula><mml:math id="m93"><mml:mi>μ</mml:mi></mml:math></inline-formula> = 1, <inline-formula><mml:math id="m94"><mml:mrow><mml:mi>σ</mml:mi><mml:mo> </mml:mo></mml:mrow></mml:math></inline-formula>= 1.5 for with difference). The only exception occurred in unbalanced DIF conditions with short sequences (average length = 7) and group differences, where estimates were slightly biased (<inline-formula><mml:math id="m95"><mml:mi>n</mml:mi></mml:math></inline-formula> = 1000: <inline-formula><mml:math id="m96"><mml:mrow><mml:mover accent="true"><mml:mover accent="true"><mml:mi>μ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo stretchy="true">¯</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> = 0.76, <inline-formula><mml:math id="m97"><mml:mrow><mml:mover accent="true"><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo stretchy="true">¯</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> = 1.38; <inline-formula><mml:math id="m98"><mml:mi>n</mml:mi></mml:math></inline-formula> = 2000: <inline-formula><mml:math id="m99"><mml:mrow><mml:mover accent="true"><mml:mover accent="true"><mml:mi>μ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo stretchy="true">¯</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> = 0.85, <inline-formula><mml:math id="m100"><mml:mrow><mml:mover accent="true"><mml:mover accent="true"><mml:mi>σ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo stretchy="true">¯</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> = 1.42).</p>
<p>As expected, estimation accuracy improved with larger sample sizes and longer sequences. With <inline-formula><mml:math id="m101"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>1000</mml:mn></mml:mrow></mml:math></inline-formula> (500 per group), parameter recovery was adequate under most conditions, but showed some deterioration with short sequences, particularly in complex scenarios involving both group differences and unbalanced DIF. For instance, RMSE values for ability estimates approached 0.35 under these challenging conditions, suggesting that smaller sample sizes may compromise estimation accuracy when combined with limited process information. When <inline-formula><mml:math id="m102"><mml:mi>n</mml:mi></mml:math></inline-formula> &gt; 1000 and sequences were not short, RMSE remained below .1 for transition parameters <inline-formula><mml:math id="m103"><mml:mi>a</mml:mi></mml:math></inline-formula> and below .12 for covariate coefficients <inline-formula><mml:math id="m104"><mml:mi>b</mml:mi></mml:math></inline-formula>. Additionally, group differences had minimal impact on estimation accuracy, particularly with longer sequences and larger samples.</p>
<p>The SRM showed comparable performance to the SRM-C only under DIF-free conditions without group differences. With DIF or group differences present, the SRM’s accuracy deteriorated substantially, with the performance gap widening as sequence length and sample size increased. These results suggest the SRM-C should be preferred when sample heterogeneity exists, while serving as a viable alternative to the SRM when group abilities are homogeneous.</p></sec>
<sec><title>Type I Error Rate and Statistical Power Using the SRM-C to Assess DIF</title>
	<p>DIF detection was based on 95% highest posterior density (HPD) intervals for <inline-formula><mml:math id="m105"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> parameters, with significance determined by whether intervals contained zero. Type I error rate was defined as the proportion of truly zero coefficients incorrectly identified as significant, while power was the proportion of truly non-zero coefficients correctly identified as significant. Results across replications are summarized in <xref ref-type="table" rid="t2">Tables 2</xref> and <xref ref-type="table" rid="t3">Table 3</xref>.</p>
<table-wrap id="t2" position="anchor" orientation="portrait">
<label>Table 2</label><caption><title>Type I Error Rates (%) for DIF Detection</title></caption>
<table frame="hsides" rules="groups">
<col width="" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th/>
<th/>
<th/>
<th colspan="3" scope="colgroup">DIF Pattern<hr/></th>
</tr>
<tr>
<th valign="bottom">Group Difference</th>
<th valign="bottom">Sample Size</th>
<th valign="bottom">Sequence Length</th>	
<th valign="bottom">DIF-Free</th>
<th valign="bottom">Balanced</th>
<th valign="bottom">Unbalanced</th>
</tr>
</thead>
<tbody>
<tr>
<td>No</td>
<td>1000</td>
<td>Short</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td align="char" char=".">0.31</td>
</tr>
<tr>
<td/>
<td/>	
<td>Medium</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td align="char" char=".">0.08</td>
</tr>
<tr>
<td/>
<td/>	
<td>Long</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td>0</td>
</tr>
<tr>
<td/>	
<td>2000</td>
<td>Short</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td>0</td>
</tr>
<tr>
<td/>
<td/>	
<td>Medium</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td align="char" char=".">0.08</td>
</tr>
<tr>
<td/>
<td/>	
<td>Long</td>
<td align="char" char=".">0.06</td>
<td align="char" char=".">0.00</td>
<td>0</td>
</tr>
<tr style="grey-border-top">
<td>Yes</td>
<td>1000</td>
<td>Short</td>
<td>0</td>
<td align="char" char=".">0.08</td>
<td align="char" char=".">0.31</td>
</tr>
<tr>
<td/>
<td/>	
<td>Medium</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td align="char" char=".">0.08</td>
</tr>
<tr>
<td/>
<td/>	
<td>Long</td>
<td>0</td>
<td align="char" char=".">0.00</td>
<td align="char" char=".">0.08</td>
</tr>
<tr>
<td/>	
<td>2000</td>
<td>Short</td>
<td>0</td>
<td align="char" char=".">0.08</td>
<td align="char" char=".">0.15</td>
</tr>
<tr>
<td/>
<td/>	
<td>Medium</td>
<td>0</td>
<td align="char" char=".">0.08</td>
<td align="char" char=".">0.23</td>
</tr>
<tr>
<td/>
<td/>	
<td>Long</td>
<td align="char" char=".">0.06</td>
<td align="char" char=".">0.00</td>
<td>0</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="t3" position="anchor" orientation="portrait">
<label>Table 3</label><caption><title>Statistical Power (%) for DIF Detection</title></caption>
<table frame="hsides" rules="groups">
<col width="" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th/>
<th/>
<th/>
<th/>
<th/>
<th colspan="5" scope="colgroup">Coefficients<hr/></th>
</tr>
<tr>
<th valign="bottom">DIF Pattern</th>
<th valign="bottom">Group Difference</th>
<th valign="bottom">Sample Size</th>
<th valign="bottom">Sequence Length</th>
<th valign="bottom">Average Power (%)</th>	
<th  valign="bottom">	<inline-formula><mml:math id="m106"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom">	<inline-formula><mml:math id="m107"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom">	<inline-formula><mml:math id="m108"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom">	<inline-formula><mml:math id="m109"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mi>F</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
	<th valign="bottom">	<inline-formula><mml:math id="m110"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mi>C</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>/<inline-formula><mml:math id="m115"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mo>#</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
</tr>
</thead>
<tbody>
<tr>
<td>Balanced</td>
<td>No</td>
<td>1000</td>
<td>Short</td>
<td align="char" char=".">85.2</td>
<td>49</td>
<td>97</td>
<td>100</td>
<td>95</td>
<td>85</td>
</tr>
<tr>
<td/>
<td/>
<td/>	
<td>Medium</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
<td>2000</td>
<td>Short</td>
<td align="char" char=".">97.6</td>
<td>90</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>98</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
<td/>	
<td>Yes</td>
<td>1000</td>
<td>Short</td>
<td align="char" char=".">65.8</td>
<td>29</td>
<td>87</td>
<td>98</td>
<td>74</td>
<td>41</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
<td/>	
<td/>	
<td>2000</td>
<td>Short</td>
<td align="char" char=".">93.6</td>
<td>80</td>
<td>99</td>
<td>100</td>
<td>99</td>
<td>90</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
	<tr style="grey-border-top"><?pagebreak-before?>
<td/>
<td/>
<td/>
<td/>
<td/>
</tr>
<tr>
<td>Unbalanced</td>	
<td>No</td>
<td>1000</td>
<td>Short</td>
<td align="char" char=".">75.6</td>
<td>19</td>
<td>81</td>
<td>95</td>
<td>88</td>
<td>95</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td align="char" char=".">98.8</td>
<td>97</td>
<td>100</td>
<td>100</td>
<td>97</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
<td/>
<td/>	
<td>2000</td>
<td>Short</td>
<td align="char" char=".">95.8</td>
<td>79</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
<td/>	
<td>Yes</td>
<td>1000</td>
<td>Short</td>
<td align="char" char=".">49.6</td>
<td>8</td>
<td>53</td>
<td>72</td>
<td>43</td>
<td>72</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td align="char" char=".">98.8</td>
<td>96</td>
<td>100</td>
<td>100</td>
<td>98</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
<td>2000</td>
<td>Short</td>
<td align="char" char=".">87.6</td>
<td>46</td>
<td>95</td>
<td>98</td>
<td>99</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Medium</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
<tr>
	<td/>
	<td/>
	<td/>	
<td>Long</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
<td>100</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Type I error rates remained well-controlled (below 0.5%) across all conditions. In DIF-free conditions, error rates were predominantly 0.00%, with only a slight increase to 0.06% in conditions with <inline-formula><mml:math id="m116"><mml:mi>n</mml:mi></mml:math></inline-formula> = 2000 and long sequences, regardless of group differences. Under DIF conditions, error rates remained low but showed slight sensitivity to sequence length and sample size, with balanced DIF conditions showing better control than unbalanced conditions. The highest error rate (0.31%) was observed in the unbalanced DIF conditions with <inline-formula><mml:math id="m117"><mml:mi>n</mml:mi></mml:math></inline-formula> = 1000 and short sequences, regardless of group differences. This rate remained well below the conventional 5% level.</p><?table t2?><?table t3?>
<p>As shown in <xref ref-type="table" rid="t3">Table 3</xref>, power exceeded 85% in most conditions except those with short sequences and <inline-formula><mml:math id="m118"><mml:mi>n</mml:mi></mml:math></inline-formula> = 1000. Statistical power was higher for balanced than unbalanced DIF conditions, and showed strong sensitivity to sequence length, exceeding 98% for medium and long sequences. Sample size and group differences also affected power, particularly with short sequences. For example, under unbalanced DIF with group differences, power increased from 49.6% (<inline-formula><mml:math id="m119"><mml:mi>n</mml:mi></mml:math></inline-formula> = 1000) to 87.6% (<inline-formula><mml:math id="m120"><mml:mi>n</mml:mi></mml:math></inline-formula> = 2000) with short sequences, suggesting that large samples can compensate for short sequence lengths (&lt; 10 transitions). These results highlight that while the method maintains excellent Type I error control even with moderate sample sizes (500 per group), sufficient statistical power for DIF detection requires either larger samples or longer process sequences, with the combination of small samples and short sequences presenting the most challenging scenario for reliable DIF detection.</p>
<p>Power varied across coefficients, influenced by both effect size and transition frequency. Coefficients with smaller absolute values (e.g., <inline-formula><mml:math id="m121"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> = 0.4) showed lower detection rates than those with larger values. The transition frequency also impacted power, as illustrated by <inline-formula><mml:math id="m122"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>E</mml:mi><mml:mi>F</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>: in balanced DIF conditions, its positive value (1) increased the tendency and average frequency of this transition for the focal group, while in unbalanced conditions, its negative value (-1) decreased this tendency, resulting in higher power for balanced conditions.</p></sec></sec></sec>
<sec><title>Empirical Study</title>
<p>An empirical analysis was conducted using process data from the Tickets task in PISA 2012 to demonstrate the practical application of the SRM-C.</p>
<sec><title>The Tickets Task</title>
<p>The <italic>Tickets</italic> task from PISA 2012 was selected for empirical analysis due to its structured interface design and extensive use in process data modeling literature (<xref ref-type="bibr" rid="r9">Chen, 2020</xref>; <xref ref-type="bibr" rid="r12">Fu et al., 2023</xref>; <xref ref-type="bibr" rid="r19">Han et al., 2022</xref>; <xref ref-type="bibr" rid="r61">Xiao &amp; Liu, 2024</xref>). In the focal item (CP038Q02), examinees operated a virtual ticketing machine to purchase a full fare country train ticket with two individual trips (<xref ref-type="bibr" rid="r42">OECD, 2014</xref>)<xref ref-type="fn" rid="fn1"><sup>1</sup></xref><fn id="fn1"><label>1</label>
<p>The original process data file in the empirical study reported here is available at OECD website: <ext-link ext-link-type="uri" xlink:href="http://www.oecd.org/pisa/pisaproducts/database-cbapisa2012.htm">http://www.oecd.org/pisa/pisaproducts/database-cbapisa2012.htm</ext-link>.</p></fn>. The task’s well-defined structure allows for clear state decomposition. Following <xref ref-type="bibr" rid="r19">Han et al. (2022)</xref>, we decomposed the response process into 11 distinct states with 27 possible transitions between them (<xref ref-type="fig" rid="f5">Figure 5</xref>).</p><fig id="f5" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 5</label><caption>
<title>State-Transition Diagram for PISA Tickets Task (CP038Q02)</title><p><italic>Note</italic>. (<xref ref-type="bibr" rid="r19">Han et al., 2022</xref>). Ellipses denote start/end states; rectangles denote intermediate states. Solid arrows indicate correct transitions; dotted arrows indicate incorrect transitions. States represent distinct interface stages; transitions represent examinees’ actions between states.</p></caption><graphic xlink:href="meth.16999-f5" position="anchor" orientation="portrait"/></fig></sec>
<sec sec-type="methods"><title>Method</title>
<p>The analysis included response sequences from 1,672 Finnish (reference group) and 1,752 Australian (focal group) examinees (<italic>n</italic> = 3,424). Sequence lengths ranged from 5 to 43 actions (<italic>M</italic> = 6.79). Success rates differed notably between Finnish (57.5%) and Australian (70.3%) examinees, necessitating investigation of whether this disparity stems from ability differences or potential differential item functioning.</p>
<p>The SRM-C was fitted to the data using Bayesian estimation with the same MCMC specifications as the simulation study. The SRM-C implementation treated nationality as a binary covariate (0 = Finnish, 1 = Australian) to examine its effects on both ability distribution and response processes. All data and analysis code are available at <xref ref-type="bibr" rid="r17">Han and Ji (2025)</xref>.</p></sec>
<sec><title>Results of the Empirical Study</title>
<sec><title>Convergence and Model Fit</title>
<p>Model convergence was achieved with PSRF &lt; 1.2 for all parameters (<xref ref-type="bibr" rid="r14">Gelman &amp; Rubin, 1992</xref>). Model fit was evaluated using posterior predictive checks (PPC) based on 1,000 MCMC iterations. <xref ref-type="fig" rid="f6">Figure 6</xref> presents the comparison between observed state transition frequencies (points) and their posterior predictive distributions (boxplots). The observed values align well with model predictions, with all observations falling within their 95% prediction intervals. The posterior predictive p-value (<italic>ppp</italic> = 0.139) falls within the acceptable range [0.05, 0.95], indicating adequate model fit (<xref ref-type="bibr" rid="r13">Gelman et al., 2013</xref>).</p><fig id="f6" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 6</label><caption>
<title>Posterior Predictive Check for State Transition Frequencies</title><p><italic>Note</italic>. Boxplots show posterior predictive distributions (median, IQR, and 2.5th-97.5th percentiles); points indicate observed frequencies.</p></caption><graphic xlink:href="meth.16999-f6" position="anchor" orientation="portrait"/></fig></sec>
<sec><title>Group-Specific Ability Parameters</title>
<p>The Australian group (focal group) demonstrated significantly higher mean ability (posterior <italic>Mean</italic> = 0.289, <italic>SD</italic> = 0.054, 95% Highest Posterior Density (HPD): [0.168, 0.375]) compared to the Finnish group (reference group), and their ability variance (0.609) was lower than the reference group’s standardized variance. These differences indicate distinct ability distributions between the two groups in this problem-solving task.</p></sec>
<sec><title>Country Effects on Response Processes</title>
<p>The posterior distributions of country coefficients for all state transitions showed small magnitudes (|<inline-formula><mml:math id="m123"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo> </mml:mo></mml:mrow></mml:math></inline-formula>| &lt; 0.2) with 95% HPD intervals containing zero (<xref ref-type="table" rid="t4">Table 4</xref>). This absence of significant country effects on transition probabilities suggests measurement invariance across nations, indicating that performance differences between Finnish and Australian examinees are attributable to ability differences rather than DIF.</p>
<table-wrap id="t4" position="anchor" orientation="portrait">
<label>Table 4</label><caption><title>Posterior Distributions of Country Coefficients for State Transitions</title></caption>
	<table frame="hsides" rules="groups" style="striped-#f3f3f3; compact-1">
<col width="" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th valign="bottom">Parameter</th>
<th valign="bottom">Mean</th>
<th valign="bottom">Standard Deviation</th>
<th valign="bottom">Lower Bound of the 95% HPD Interval</th>
<th valign="bottom">Upper Bound of the 95% HPD Interval</th>
</tr>
</thead>
<tbody>
<tr>
<td>	<inline-formula><mml:math id="m124"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>A</mml:mi><mml:mi>B</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>-0.026 </bold></td>
<td align="char" char="."><bold>0.041 </bold></td>
<td align="char" char="."><bold>-0.123 </bold></td>
<td align="char" char="."><bold>0.012 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m125"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>AG</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">0.026 </td>
<td align="char" char=".">0.041 </td>
<td align="char" char=".">-0.012 </td>
<td align="char" char=".">0.123 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m126"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi><mml:mi>C</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>-0.051 </bold></td>
<td align="char" char="."><bold>0.075 </bold></td>
<td align="char" char="."><bold>-0.222 </bold></td>
<td align="char" char="."><bold>0.062 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m127"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>BA</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">0.034 </td>
<td align="char" char=".">0.078 </td>
<td align="char" char=".">-0.097 </td>
<td align="char" char=".">0.210 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m128"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>BH</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">0.018 </td>
<td align="char" char=".">0.049 </td>
<td align="char" char=".">-0.067 </td>
<td align="char" char=".">0.135 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m129"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>C</mml:mi><mml:mi>D</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.034 </bold></td>
<td align="char" char="."><bold>0.067 </bold></td>
<td align="char" char="."><bold>-0.082 </bold></td>
<td align="char" char="."><bold>0.168 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m130"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>CA</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.021 </td>
<td align="char" char=".">0.074 </td>
<td align="char" char=".">-0.183 </td>
<td align="char" char=".">0.123 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m131"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>CI</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.012 </td>
<td align="char" char=".">0.048 </td>
<td align="char" char=".">-0.126 </td>
<td align="char" char=".">0.082 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m132"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>D</mml:mi><mml:mi>E</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.197 </bold></td>
<td align="char" char="."><bold>0.150 </bold></td>
<td align="char" char="."><bold>-0.052 </bold></td>
<td align="char" char="."><bold>0.468 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m133"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>DA</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.118 </td>
<td align="char" char=".">0.160 </td>
<td align="char" char=".">-0.467 </td>
<td align="char" char=".">0.125 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m134"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>DK</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.034 </td>
<td align="char" char=".">0.115 </td>
<td align="char" char=".">-0.351 </td>
<td align="char" char=".">0.146 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m135"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>DF</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.044 </td>
<td align="char" char=".">0.102 </td>
<td align="char" char=".">-0.304 </td>
<td align="char" char=".">0.110 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m136"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>E</mml:mi><mml:mi>K</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.057 </bold></td>
<td align="char" char="."><bold>0.079 </bold></td>
<td align="char" char="."><bold>-0.055 </bold></td>
<td align="char" char="."><bold>0.237 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m137"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>EA</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.007 </td>
<td align="char" char=".">0.076 </td>
<td align="char" char=".">-0.183 </td>
<td align="char" char=".">0.154 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m138"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>EF</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.050 </td>
<td align="char" char=".">0.086 </td>
<td align="char" char=".">-0.258 </td>
<td align="char" char=".">0.092 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m139"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi><mml:mi>E</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.018 </bold></td>
<td align="char" char="."><bold>0.084 </bold></td>
<td align="char" char="."><bold>-0.154 </bold></td>
<td align="char" char="."><bold>0.183 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m140"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>FA</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.021 </td>
<td align="char" char=".">0.086 </td>
<td align="char" char=".">-0.204 </td>
<td align="char" char=".">0.138 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m141"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>FK</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">0.003 </td>
<td align="char" char=".">0.080 </td>
<td align="char" char=".">-0.187 </td>
<td align="char" char=".">0.170 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m142"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>G</mml:mi><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.011 </bold></td>
<td align="char" char="."><bold>0.036 </bold></td>
<td align="char" char="."><bold>-0.051 </bold></td>
<td align="char" char="."><bold>0.086 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m143"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>GH</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.011 </td>
<td align="char" char=".">0.036 </td>
<td align="char" char=".">-0.086 </td>
<td align="char" char=".">0.051 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m144"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>H</mml:mi><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.010 </bold></td>
<td align="char" char="."><bold>0.035 </bold></td>
<td align="char" char="."><bold>-0.046 </bold></td>
<td align="char" char="."><bold>0.103 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m145"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>HI</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.010 </td>
<td align="char" char=".">0.035 </td>
<td align="char" char=".">-0.103 </td>
<td align="char" char=".">0.046 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m146"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>0.013 </bold></td>
<td align="char" char="."><bold>0.043 </bold></td>
<td align="char" char="."><bold>-0.073 </bold></td>
<td align="char" char="."><bold>0.109 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m147"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>IK</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">-0.020 </td>
<td align="char" char=".">0.039 </td>
<td align="char" char=".">-0.110 </td>
<td align="char" char=".">0.046 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m148"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>IJ</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">0.007 </td>
<td align="char" char=".">0.036 </td>
<td align="char" char=".">-0.059 </td>
<td align="char" char=".">0.098 </td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m149"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>J</mml:mi><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char="."><bold>-0.007 </bold></td>
<td align="char" char="."><bold>0.042 </bold></td>
<td align="char" char="."><bold>-0.094 </bold></td>
<td align="char" char="."><bold>0.085 </bold></td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m150"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>JK</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">0.007 </td>
<td align="char" char=".">0.042 </td>
<td align="char" char=".">-0.085 </td>
<td align="char" char=".">0.094 </td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Note</italic>. Correct transitions in bold.</p>
</table-wrap-foot>
</table-wrap></sec>
<sec><title>Response Patterns and Task Characteristics</title>
	<p>Analysis of ability estimates by response pattern and state transition parameters revealed meaningful patterns in both examinee behavior and task design effects (see Online Supplemental Materials C for detailed results in <xref ref-type="bibr" rid="r18">Han et al., 2026</xref>). Ability estimates aligned with theoretical expectations: the optimal sequence (ABCDEK) yielded the highest estimates, while sequences with uncorrected errors showed the lowest estimates. State transition parameters revealed an interface-dependent correction pattern: examinees showed reluctance to correct errors at intermediate stages but demonstrated higher correction probabilities at the final purchase interface where ticket details were explicitly displayed, suggesting the impact of interface design on problem-solving behavior.</p></sec></sec></sec></sec>
<sec sec-type="discussion"><title>Summary and Discussion</title>
<p>Process data in CBAs holds significant potential to uncover and address inequities experienced by examinees, thereby ensuring fairness in the evaluation process. This study introduced the SRM-C to analyze such process data while accounting for group differences. The model demonstrates several key capabilities: accurate estimation of examinees’ abilities and task characteristic parameters, evaluation of covariate effects on ability distributions, and detection of DIF in response processes. Our simulation studies revealed that the SRM-C achieves robust parameter recovery and effective DIF detection under various conditions. Notably, adequate model performance can be achieved with either large samples (<inline-formula><mml:math id="m151"><mml:mi>n</mml:mi></mml:math></inline-formula> &gt; 2000) with short sequences or moderate samples with sequence lengths exceeding 10 transitions. The empirical analysis of PISA data demonstrated the model’s practical utility in distinguishing whether group performance differences stem from genuine ability variations or potential measurement bias. These findings suggest that the SRM-C can serve as a valuable tool for multiple educational applications: enabling researchers to understand group differences in ability distributions, helping test developers identify and minimize potential biases in assessments, assisting practitioners in analyzing problem-solving strategies across different populations, and supporting fair evaluation in high-stakes assessment contexts.</p>
<p>While the SRM-C demonstrates theoretical soundness and favorable performance under simulation conditions, several practical considerations warrant discussion regarding its real-world applicability. Model identification in the SRM-C parallels that of the multiple indicators, multiple causes (MIMIC) model (<xref ref-type="bibr" rid="r38">Muthén, 1985</xref>; <xref ref-type="bibr" rid="r40">Muthén et al., 1991</xref>; <xref ref-type="bibr" rid="r39">Muthén &amp; Lehman, 1985</xref>), where both measurement and structural components require careful consideration of identifiability constraints. Like the MIMIC model for DIF detection, the SRM-C faces an identification challenge when no anchor items (or state transitions) are pre-specified. Following recent developments in DIF detection methodology (<xref ref-type="bibr" rid="r10">Chen et al., 2023</xref>), we addressed this issue through a sparsity assumption — the premise that most state transitions are DIF-free. This assumption, which is reasonable for most educational contexts where well-designed assessments should exhibit measurement invariance across groups, was implemented within a Bayesian framework using a horseshoe prior with the global shrinkage parameter fixed at 0.2, following <xref ref-type="bibr" rid="r5">Betancourt’s (2021)</xref> one-horse-town approach. The method is most appropriate for applications where the majority of process elements are expected to function equivalently across groups, consistent with best practices in fair assessment design. However, the sparsity assumption may be violated when systematic measurement bias is pervasive across state transitions. In such cases, alternative identification strategies warrant future investigation, including: (1) anchor-based approaches where domain experts could pre-specify invariant transitions based on substantive knowledge, (2) alternative modeling frameworks that do not rely on sparsity assumptions, such as constrained multi-group extensions with different identification strategies. When the sparsity assumption is uncertain, we currently recommend conducting preliminary analyses to assess the plausibility of measurement invariance before model fitting, and when possible, triangulating findings with external validity evidence or alternative analytical approaches.</p>
<p>Within the adopted sparsity framework, we fixed <inline-formula><mml:math id="m152"><mml:mi>τ</mml:mi></mml:math></inline-formula> at 0.2, reflecting a relatively liberal approach that prioritizes sensitivity for detecting measurement bias over strict sparsity enforcement. The horseshoe prior’s local parameters <inline-formula><mml:math id="m153"><mml:mrow><mml:msub><mml:mi>λ</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> are estimated from the data, providing adaptive regularization for individual coefficients. Both simulation and empirical studies demonstrated satisfactory performance under this specification. Sensitivity analyses across <inline-formula><mml:math id="m154"><mml:mrow><mml:mi>τ</mml:mi><mml:mo>∈</mml:mo><mml:mfenced close="}" open="{"><mml:mrow><mml:mn>0.05</mml:mn><mml:mo>,</mml:mo><mml:mn>0.1</mml:mn><mml:mo>,</mml:mo><mml:mn>0.2</mml:mn></mml:mrow></mml:mfenced><mml:mo> </mml:mo></mml:mrow></mml:math></inline-formula>revealed that the model demonstrated robust performance across this range with stable parameter recovery and effective DIF detection. For practical applications, we recommend conducting sensitivity analyses when uncertain about the chosen value. Future research could explore full Bayesian inference for <inline-formula><mml:math id="m155"><mml:mi>τ</mml:mi></mml:math></inline-formula> through hierarchical modeling with appropriate hyperpriors, enabling data-driven estimation of the global shrinkage parameter.</p>
<p>Process data in CBAs may contain systematic bias where interface elements, cultural content, or technological familiarity systematically advantage or disadvantage certain groups across all ability levels, creating consistent rather than ability-dependent group differences. This study therefore focused on uniform DIF with a binary categorical covariate to address such bias patterns. Beyond its current capabilities, the SRM-C framework demonstrates strong extensibility potential. A particularly promising extension involves developing a two-parameter variant by allowing discrimination parameters (directional indicators <inline-formula><mml:math id="m156"><mml:mrow><mml:msubsup><mml:mi>I</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula>) to be estimated rather than fixed at <inline-formula><mml:math id="m157"><mml:mrow><mml:mo>±</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>, and incorporating covariate-by-ability interaction terms to enable detection of non-uniform DIF. This extension leverages the model’s foundational structure while expanding its analytical capabilities to address more complex DIF patterns in process data fairness evaluation.</p>
<p>The SRM-C is primarily designed for large-scale assessment contexts where adequate sample sizes and process data richness can support robust parameter estimation. Our simulation results indicate that the method performs optimally with sample sizes exceeding 1000 per group, particularly when combined with sequences longer than 10 transitions. While the method showed acceptable performance with 500 participants per group under favorable conditions (longer sequences, simpler DIF patterns), we recommend caution when applying the SRM-C to small-scale studies. When large samples are not feasible, researchers should ensure longer response sequences (≥ 20 transitions) to compensate for reduced sample size, as sequence length and sample size can partially offset each other in supporting parameter identification and estimation precision.</p>
<p>The PISA empirical analysis primarily demonstrated the method’s behavior under ideal conditions — clear group differences with minimal DIF. Real-world applications may encounter more complex DIF patterns, smaller effect sizes, or violations of distributional assumptions that require additional consideration. For researchers considering the SRM-C, we recommend: (1) conducting preliminary analyses to assess the plausibility of the sparsity assumption within their specific context; (2) ensuring adequate sample sizes (≥ 1000 per group for optimal performance under all conditions, or ≥ 500 per group when sequences exceed 20 transitions, as longer sequences can partially compensate for smaller sample sizes); (3) performing comprehensive model diagnostics, including parameter convergence assessment, posterior predictive checks, and model comparison with the simplified SRM; (4) conducting sensitivity analyses with different <inline-formula><mml:math id="m158"><mml:mi>τ</mml:mi></mml:math></inline-formula> values to optimize performance within the sparsity framework, particularly when facing convergence issues or estimation instability; and (5) comparing results with traditional anchor-based methods when reliable anchor items can be reasonably specified.</p></sec>
</body>
<back>
<ref-list><title>References</title>
<ref id="r1"><mixed-citation publication-type="book">American Educational Research Association, American Psychological Association, &amp; National Council on Measurement in Education. (2014). <italic>The standards for educational and psychological testing</italic>. American Educational Research Association.</mixed-citation></ref>
<ref id="r2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Banfield</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Wilkerson</surname>, <given-names>B.</given-names></string-name></person-group> (<year>2014</year>). <article-title>Increasing student intrinsic motivation and self-efficacy through gamification pedagogy.</article-title> <source>Contemporary Issues in Education Research</source>, <volume>7</volume>(<issue>4</issue>), <fpage>291</fpage>–<lpage>298</lpage>. <pub-id pub-id-type="doi">10.19030/cier.v7i4.8843</pub-id></mixed-citation></ref>
<ref id="r3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Bauer</surname>, <given-names>D. J.</given-names></string-name>, <string-name name-style="western"><surname>Belzak</surname>, <given-names>W. C. M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Cole</surname>, <given-names>V. T.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Simplifying the assessment of measurement invariance over multiple background variables: Using regularized moderated nonlinear factor analysis to detect differential item functioning.</article-title> <source>Structural Equation Modeling</source>, <volume>27</volume>(<issue>1</issue>), <fpage>43</fpage>–<lpage>55</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2019.1642754</pub-id><pub-id pub-id-type="pmid">33132679</pub-id></mixed-citation></ref>
<ref id="r4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Belzak</surname>, <given-names>W. C. M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Bauer</surname>, <given-names>D. J.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Improving the assessment of measurement invariance: Using regularization to select anchor items and identify differential item functioning.</article-title> <source>Psychological Methods</source>, <volume>25</volume>(<issue>6</issue>), <fpage>673</fpage>–<lpage>690</lpage>. <pub-id pub-id-type="doi">10.1037/met0000253</pub-id><pub-id pub-id-type="pmid">31916799</pub-id></mixed-citation></ref>
<ref id="r5"><mixed-citation publication-type="web">Betancourt, M. (2021, May). <italic>Sparsity blues</italic>. GitHub. <ext-link ext-link-type="uri" xlink:href="https://betanalpha.github.io/assets/case_studies/modeling_sparsity.html#2223_The_horseshoe_Population_Model">https://betanalpha.github.io/assets/case_studies/modeling_sparsity.html#2223_The_horseshoe_Population_Model</ext-link></mixed-citation></ref>
<ref id="r6"><mixed-citation publication-type="confproc">Carvalho, C. M., Polson, N. G., &amp; Scott, J. G. (2009). Handling sparsity via the horseshoe. In D. van Dyk &amp; M. Welling (Eds.), <italic>Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics</italic> (pp. 73–80). PMLR.</mixed-citation></ref>
<ref id="r7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Carvalho</surname>, <given-names>C. M.</given-names></string-name>, <string-name name-style="western"><surname>Polson</surname>, <given-names>N. G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Scott</surname>, <given-names>J. G.</given-names></string-name></person-group> (<year>2010</year>). <article-title>The horseshoe estimator for sparse signals.</article-title> <source>Biometrika</source>, <volume>97</volume>(<issue>2</issue>), <fpage>465</fpage>–<lpage>480</lpage>. <pub-id pub-id-type="doi">10.1093/biomet/asq017</pub-id></mixed-citation></ref>
<ref id="r8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Casella</surname>, <given-names>G., </given-names></string-name><string-name name-style="western"><surname>Ghosh</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Gill</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Kyung</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2010</year>). <article-title>Penalized regression, standard errors, and Bayesian Lassos.</article-title> <source>Bayesian Analysis</source>, <volume>5</volume>(<issue>2</issue>), <fpage>369</fpage>–<lpage>412</lpage>. <pub-id pub-id-type="doi">10.1214/10-BA607</pub-id></mixed-citation></ref>
<ref id="r9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Chen</surname>, <given-names>Y.</given-names></string-name></person-group> (<year>2020</year>). <article-title>A continuous-time dynamic choice measurement model for problem-solving process data.</article-title> <source>Psychometrika</source>, <volume>85</volume>(<issue>4</issue>), <fpage>1052</fpage>–<lpage>1075</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-020-09734-1</pub-id><pub-id pub-id-type="pmid">33346883</pub-id></mixed-citation></ref>
<ref id="r10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Chen</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Li</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Ouyang</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Xu</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2023</year>). <article-title>DIF statistical inference without knowing anchoring items.</article-title> <source>Psychometrika</source>, <volume>88</volume>(<issue>4</issue>), <fpage>1097</fpage>–<lpage>1122</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-023-09930-9</pub-id><pub-id pub-id-type="pmid">37550561</pub-id></mixed-citation></ref>
<ref id="r11"><mixed-citation publication-type="book">Ercikan, K., &amp; Pellegrino, J. W. (Eds.). (2017). <italic>Validation of score meaning for the next generation of assessments: The use of response processes</italic>. Routledge.</mixed-citation></ref>
<ref id="r12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Fu</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Zhan</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Chen</surname>, <given-names>Q.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Jiao</surname>, <given-names>H.</given-names></string-name></person-group> (<year>2023</year>). <article-title>Joint modeling of action sequences and action time in computer-based interactive tasks.</article-title> <source>Behavior Research Methods</source>, <volume>56</volume>, <fpage>4293</fpage>–<lpage>4310</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-023-02178-2</pub-id><pub-id pub-id-type="pmid">37429984</pub-id></mixed-citation></ref>
<ref id="r13"><mixed-citation publication-type="book">Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., &amp; Rubin, D. B. (2013). <italic>Bayesian data analysis</italic> (3<sup>rd</sup> ed.). CRC Press.</mixed-citation></ref>
<ref id="r14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gelman</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Rubin</surname>, <given-names>D. B.</given-names></string-name></person-group> (<year>1992</year>). <article-title>Inference from iterative simulation using multiple sequences.</article-title> <source>Statistical Science</source>, <volume>7</volume>(<issue>4</issue>), <fpage>457</fpage>–<lpage>472</lpage>. <pub-id pub-id-type="doi">10.1214/ss/1177011136</pub-id></mixed-citation></ref>
<ref id="r15"><mixed-citation publication-type="book">Goodman, M., Finnegan, R., Mohadjer, L., Krenzke, T., &amp; Hogan, J. (2013). <italic>Literacy, numeracy, and problem solving in technology-rich environments among US adults: Results from the program for the international assessment of adult competencies 2012: First look</italic> (No. NCES 2014-008). National Center for Education Statistics.</mixed-citation></ref>
<ref id="r16"><mixed-citation publication-type="book">Griffin, P., McGaw, B., &amp; Care, E. (Eds.). (2012). <italic>Assessment and teaching of 21<sup>st</sup> century skills</italic>. Springer.</mixed-citation></ref>
<ref id="r17"><mixed-citation publication-type="web">Han, Y., &amp; Ji, F. (2025). <italic>Bayesian sampler for the Sequential Response Model with Covariates (SRM-C)</italic> [OSF project page containing data and analysis/sampler code for study]. Open Science Framework. <ext-link ext-link-type="uri" xlink:href="https://osf.io/e3pqv/overview">https://osf.io/e3pqv/overview</ext-link></mixed-citation></ref>
	<ref id="r18"><mixed-citation publication-type="data">Han, Y., Ji, F., Chen, Y., Gan, K., &amp; Liu, H. (2026). <italic>Supplementary Materials to</italic> “Analyzing group differences and measurement fairness in process data: A sequential response model with covariates” [Supplemental procedures and analyses]. PsychOpen GOLD. <pub-id pub-id-type="doi">10.23668/psycharchives.21776</pub-id></mixed-citation></ref>
<ref id="r19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Han</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Liu</surname>, <given-names>H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ji</surname>, <given-names>F.</given-names></string-name></person-group> (<year>2022</year>). <article-title>A sequential response model for analyzing process data on technology-based problem-solving tasks.</article-title> <source>Multivariate Behavioral Research</source>, <volume>57</volume>(<issue>6</issue>), <fpage>960</fpage>–<lpage>977</lpage>. <pub-id pub-id-type="doi">10.1080/00273171.2021.1932403</pub-id><pub-id pub-id-type="pmid">34224276</pub-id></mixed-citation></ref>
<ref id="r20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Han</surname>, <given-names>Y.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Wilson</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Analyzing student response processes to evaluate success on a technology-based problem-solving task.</article-title> <source>Applied Measurement in Education</source>, <volume>35</volume>(<issue>1</issue>), <fpage>33</fpage>–<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1080/08957347.2022.2034821</pub-id></mixed-citation></ref>
<ref id="r21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hastings</surname>, <given-names>W. K.</given-names></string-name></person-group> (<year>1970</year>). <article-title>Monte Carlo sampling methods using Markov chains and their applications.</article-title> <source>Biometrika</source>, <volume>57</volume>(<issue>1</issue>), <fpage>97</fpage>–<lpage>109</lpage>. <pub-id pub-id-type="doi">10.1093/biomet/57.1.97</pub-id></mixed-citation></ref>
<ref id="r22"><mixed-citation publication-type="book">Hesse, F., Care, E., Buder, J., Sassenberg, K., &amp; Griffin, P. (2015). A framework for teachable collaborative problem solving skills. In P. Griffin &amp; E. Care (Eds.), <italic>Assessment and teaching of 21<sup>st</sup> century skills: Methods and approach</italic> (pp. 37–56). Springer.</mixed-citation></ref>
<ref id="r23"><mixed-citation publication-type="book">Holland, P. W., &amp; Thayer, D. T. (1988). Differential item performance and the Mantel-Haenszel procedure. In H. Wainer &amp; H. I. Braun (Eds.), <italic>Test validity</italic> (pp. 129–145). Lawrence Erlbaum Associates.</mixed-citation></ref>
<ref id="r24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Huang</surname>, <given-names>P. H.</given-names></string-name></person-group> (<year>2018</year>). <article-title>A penalized likelihood method for multi-group structural equation modelling.</article-title> <source>British Journal of Mathematical &amp; Statistical Psychology</source>, <volume>71</volume>(<issue>3</issue>), <fpage>499</fpage>–<lpage>522</lpage>. <pub-id pub-id-type="doi">10.1111/bmsp.12130</pub-id><pub-id pub-id-type="pmid">29500879</pub-id></mixed-citation></ref>
<ref id="r25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kass</surname>, <given-names>R. E.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Raftery</surname>, <given-names>A. E.</given-names></string-name></person-group> (<year>1995</year>). <article-title>Bayes factors.</article-title> <source>Journal of the American Statistical Association</source><italic>, </italic><volume>90</volume>(<issue>430</issue>), <fpage>773</fpage>–<lpage>795</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1995.10476572</pub-id></mixed-citation></ref>
<ref id="r26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>LaMar</surname>, <given-names>M. M.</given-names></string-name></person-group> (<year>2018</year>). <article-title>Markov decision process measurement model.</article-title> <source>Psychometrika</source>, <volume>83</volume>(<issue>1</issue>), <fpage>67</fpage>–<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-017-9570-0</pub-id><pub-id pub-id-type="pmid">28447309</pub-id></mixed-citation></ref>
<ref id="r27"><mixed-citation publication-type="book">Levy, R., &amp; Mislevy, R. J. (2016). <italic>Bayesian psychometric modeling</italic>. CRC Press.</mixed-citation></ref>
<ref id="r28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Li</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Zhang</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Du</surname>, <given-names>H.</given-names></string-name>, <string-name name-style="western"><surname>Zhu</surname>, <given-names>Z.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Li</surname>, <given-names>Y. M.</given-names></string-name></person-group> (<year>2015</year>). <article-title>Metacognitive planning: Development and validation of an online measure.</article-title> <source>Psychological Assessment</source>, <volume>27</volume>(<issue>1</issue>), <fpage>260</fpage>–<lpage>271</lpage>. <pub-id pub-id-type="doi">10.1037/pas0000019</pub-id><pub-id pub-id-type="pmid">25222433</pub-id></mixed-citation></ref>
<ref id="r29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Liu</surname>, <given-names>H.</given-names></string-name>, <string-name name-style="western"><surname>Liu</surname>, <given-names>Y.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Li</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2018</year>). <article-title>Analysis of process data of PISA 2012 computer-based problem solving: Application of the modified multilevel mixture IRT model.</article-title> <source>Frontiers in Psychology</source>, <volume>9</volume>, <elocation-id>1372</elocation-id>. <pub-id pub-id-type="doi">10.3389/fpsyg.2018.01372</pub-id><pub-id pub-id-type="pmid">30123171</pub-id></mixed-citation></ref>
<ref id="r30"><mixed-citation publication-type="book">Lord, F. M. (1980). <italic>Applications of item response theory to practical testing problems</italic>. Lawrence Erlbaum Associates.</mixed-citation></ref>
<ref id="r31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Magis</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Béland</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Tuerlinckx</surname>, <given-names>F.</given-names></string-name>, &amp; <string-name name-style="western"><surname>De Boeck</surname>, <given-names>P.</given-names></string-name></person-group> (<year>2010</year>). <article-title>A general framework and an R package for the detection of dichotomous differential item functioning.</article-title> <source>Behavior Research Methods</source>, <volume>42</volume>(<issue>3</issue>), <fpage>847</fpage>–<lpage>862</lpage>. <pub-id pub-id-type="doi">10.3758/BRM.42.3.847</pub-id><pub-id pub-id-type="pmid">20805607</pub-id></mixed-citation></ref>
<ref id="r32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Magis</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Tuerlinckx</surname>, <given-names>F.</given-names></string-name>, &amp; <string-name name-style="western"><surname>De Boeck</surname>, <given-names>P.</given-names></string-name></person-group> (<year>2015</year>). <article-title>Detection of differential item functioning using the lasso approach.</article-title> <source>Journal of Educational and Behavioral Statistics</source>, <volume>40</volume>(<issue>2</issue>), <fpage>111</fpage>–<lpage>135</lpage>. <pub-id pub-id-type="doi">10.3102/1076998614559747</pub-id></mixed-citation></ref>
<ref id="r33"><mixed-citation publication-type="book">Mayer, R. E. (1992). <italic>Thinking, problem solving, cognition</italic> (2<sup>nd</sup> ed). Freeman.</mixed-citation></ref>
<ref id="r34"><mixed-citation publication-type="book">Mayer, R. E., &amp; Wittrock, M. C. (2006). Problem solving. In P. A. Alexander &amp; P. H. Winne (Eds.), <italic>Handbook of educational psychology</italic> (2<sup>nd</sup> ed., pp. 287–303). Lawrence Erlbaum Associates.</mixed-citation></ref>
<ref id="r35"><mixed-citation publication-type="book">McFadden, D. (1974). Conditional logit analysis of qualitative choice behavior. In P. Zarembka (Ed.), <italic>Frontiers in Econometrics</italic> (pp. 105–142). Academic Press.</mixed-citation></ref>
<ref id="r36"><mixed-citation publication-type="web">Mislevy, R. J., Oranje, A., Bauer, M. I., von Davier, A. A., Hao, J., Corrigan, S., Hoffman, E., DiCerbo, K., &amp; John, M. (2014). <italic>Psychometric considerations in game-based assessment</italic>. GlassLabGames.</mixed-citation></ref>
<ref id="r37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Mitchell</surname>, <given-names>T. J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Beauchamp</surname>, <given-names>J. J.</given-names></string-name></person-group> (<year>1988</year>). <article-title>Bayesian variable selection in linear regression.</article-title> <source>Journal of the American Statistical Association</source>, <volume>83</volume>(<issue>404</issue>), <fpage>1023</fpage>–<lpage>1032</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1988.10478694</pub-id></mixed-citation></ref>
<ref id="r38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Muthén</surname>, <given-names>B.</given-names></string-name></person-group> (<year>1985</year>). <article-title>A method for studying the homogeneity of test items with respect to other relevant variables.</article-title> <source>Journal of Educational Statistics</source>, <volume>10</volume>(<issue>2</issue>), <fpage>121</fpage>–<lpage>132</lpage>. <pub-id pub-id-type="doi">10.3102/10769986010002121</pub-id></mixed-citation></ref>
<ref id="r39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Muthén</surname>, <given-names>B.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Lehman</surname>, <given-names>J.</given-names></string-name></person-group> (<year>1985</year>). <article-title>Multiple group IRT modeling: Applications to item bias analysis.</article-title> <source>Journal of Educational Statistics</source>, <volume>10</volume>(<issue>2</issue>), <fpage>133</fpage>–<lpage>142</lpage>. <pub-id pub-id-type="doi">10.3102/10769986010002133</pub-id></mixed-citation></ref>
<ref id="r40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Muthén</surname>, <given-names>B. O.</given-names></string-name>, <string-name name-style="western"><surname>Kao</surname>, <given-names>C. F.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Burstein</surname>, <given-names>L.</given-names></string-name></person-group> (<year>1991</year>). <article-title>Instructionally sensitive psychometrics: Application of a new IRT-based detection technique to mathematics achievement test items.</article-title> <source>Journal of Educational Measurement</source>, <volume>28</volume>(<issue>1</issue>), <fpage>1</fpage>–<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1111/j.1745-3984.1991.tb00340.x</pub-id></mixed-citation></ref>
<ref id="r41"><mixed-citation publication-type="web">National Center for Education Statistics. (2014). <italic>NAEP TEL Wells sample item</italic>. <ext-link ext-link-type="uri" xlink:href="http://nces.ed.gov/nationsreportcard/tel/wells_item.aspx">http://nces.ed.gov/nationsreportcard/tel/wells_item.aspx</ext-link></mixed-citation></ref>
<ref id="r42"><mixed-citation publication-type="book">OECD. (2014). <italic>PISA 2012 results: Creative problem solving: Students’ skills in tackling real-life problems</italic> (Vol. 5). OECD Publishing.</mixed-citation></ref>
<ref id="r43"><mixed-citation publication-type="book">OECD. (2016). <italic>Technical report of the survey of adult skills (PIAAC)</italic> (2<sup>nd</sup> ed.). OECD Publishing.</mixed-citation></ref>
<ref id="r44"><mixed-citation publication-type="web">OECD. (2017). <italic>PISA 2015 technical report</italic>. OECD Publishing. <ext-link ext-link-type="uri" xlink:href="https://www.oecd.org/pisa/data/2015-technical-report/PISA2015_TechRep_Final.pdf">https://www.oecd.org/pisa/data/2015-technical-report/PISA2015_TechRep_Final.pdf</ext-link></mixed-citation></ref>
<ref id="r45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Park</surname>, <given-names>T.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Casella</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2008</year>). <article-title>The Bayesian Lasso.</article-title> <source>Journal of the American Statistical Association</source>, <volume>103</volume>(<issue>482</issue>), <fpage>681</fpage>–<lpage>686</lpage>. <pub-id pub-id-type="doi">10.1198/016214508000000337</pub-id></mixed-citation></ref>
<ref id="r46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Piironen</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Vehtari</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2017</year>). <article-title>Sparsity information and regularization in the horseshoe and other shrinkage priors.</article-title> <source>Electronic Journal of Statistics</source>, <volume>11</volume>(<issue>2</issue>), <fpage>5018</fpage>–<lpage>5051</lpage>. <pub-id pub-id-type="doi">10.1214/17-EJS1337SI</pub-id></mixed-citation></ref>
<ref id="r47"><mixed-citation publication-type="book">Polson, N. G., &amp; Scott, J. G. (2011). Shrink globally, act locally: Sparse Bayesian regularization and prediction. In J. M. Bernardo, M. J. Bayarri, J. O. Berger, A. P. Dawid, D. Heckerman, A. F. M. Smith &amp; M. West (Eds.), <italic>Bayesian statistics 9</italic> (pp. 501–538). Oxford University Press.</mixed-citation></ref>
<ref id="r48"><mixed-citation publication-type="book">R Core Team. (2018). <italic>R: A language and environment for statistical computing</italic>. R Foundation for Statistical Computing.</mixed-citation></ref>
<ref id="r49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Raju</surname>, <given-names>N. S.</given-names></string-name></person-group> (<year>1988</year>). <article-title>The area between two item characteristic curves.</article-title> <source>Psychometrika</source>, <volume>53</volume>(<issue>4</issue>), <fpage>495</fpage>–<lpage>502</lpage>. <pub-id pub-id-type="doi">10.1007/BF02294403</pub-id></mixed-citation></ref>
<ref id="r50"><mixed-citation publication-type="book">San Martín, E. (2016). Identification of item response theory models. In W. J. van der Linden (Ed.), <italic>Handbook of item response theory</italic> (Vol. 2, pp. 127–150). CRC Press.</mixed-citation></ref>
<ref id="r51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Schauberger</surname>, <given-names>G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Mair</surname>, <given-names>P.</given-names></string-name></person-group> (<year>2020</year>). <article-title>A regularization approach for the detection of differential item functioning in generalized partial credit models.</article-title> <source>Behavior Research Methods</source>, <volume>52</volume>(<issue>1</issue>), <fpage>279</fpage>–<lpage>294</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-019-01224-2</pub-id><pub-id pub-id-type="pmid">30887369</pub-id></mixed-citation></ref>
<ref id="r52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Schleicher</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2008</year>). <article-title>PIAAC: A new strategy for assessing adult competencies.</article-title> <source>International Review of Education</source>, <volume>54</volume>(<issue>5–6</issue>), <fpage>627</fpage>–<lpage>650</lpage>. <pub-id pub-id-type="doi">10.1007/s11159-008-9105-0</pub-id></mixed-citation></ref>
<ref id="r53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Shu</surname>, <given-names>Z.</given-names></string-name>, <string-name name-style="western"><surname>Bergner</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Zhu</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Hao</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>von Davier</surname>, <given-names>A. A.</given-names></string-name></person-group> (<year>2017</year>). <article-title>An item response theory analysis of problem-solving processes in scenario-based tasks.</article-title> <source>Psychological Test and Assessment Modeling</source>, <volume>59</volume>(<issue>1</issue>), <fpage>109</fpage>–<lpage>131</lpage>.</mixed-citation></ref>
<ref id="r54"><mixed-citation publication-type="book">Shute, V. J., &amp; Moore, G. R. (2017). Consistency and validity in game-based stealth assessment. In H. Jiao &amp; R. W. Lissitz (Eds.), <italic>Technology enhanced innovative assessment: Development, modeling, and scoring from an interdisciplinary perspective</italic> (pp. 31–51). Information Age Publishing.</mixed-citation></ref>
<ref id="r55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Siddiq</surname>, <given-names>F.</given-names></string-name>, <string-name name-style="western"><surname>Gochyyev</surname>, <given-names>P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Wilson</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2017</year>). <article-title>Learning in digital networks – ICT literacy: A novel assessment of students’ 21<sup>st</sup> century skills.</article-title> <source>Computers &amp; Education</source>, <volume>109</volume>, <fpage>11</fpage>–<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2017.01.014</pub-id></mixed-citation></ref>
<ref id="r56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Spiegelhalter</surname>, <given-names>D. J.</given-names></string-name>, <string-name name-style="western"><surname>Best</surname>, <given-names>N. G.</given-names></string-name>, <string-name name-style="western"><surname>Carlin</surname>, <given-names>B. P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Van Der Linde</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2002</year>). <article-title>Bayesian measures of model complexity and fit.</article-title> <source>Journal of the Royal Statistical Society, Series B: Statistical Methodology</source><italic>, </italic><volume>64</volume>(<issue>4</issue>), <fpage>583</fpage>–<lpage>639</lpage>. <pub-id pub-id-type="doi">10.1111/1467-9868.00353</pub-id></mixed-citation></ref>
<ref id="r57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Swaminathan</surname>, <given-names>H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Rogers</surname>, <given-names>H. J.</given-names></string-name></person-group> (<year>1990</year>). <article-title>Detecting differential item functioning using logistic regression procedures.</article-title> <source>Journal of Educational Measurement</source>, <volume>27</volume>(<issue>4</issue>), <fpage>361</fpage>–<lpage>370</lpage>. <pub-id pub-id-type="doi">10.1111/j.1745-3984.1990.tb00754.x</pub-id></mixed-citation></ref>
<ref id="r58"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Thissen</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Steinberg</surname>, <given-names>L.</given-names></string-name></person-group> (<year>1986</year>). <article-title>A taxonomy of item response models.</article-title> <source>Psychometrika</source>, <volume>51</volume>(<issue>4</issue>), <fpage>567</fpage>–<lpage>577</lpage>. <pub-id pub-id-type="doi">10.1007/BF02295596</pub-id></mixed-citation></ref>
<ref id="r59"><mixed-citation publication-type="book">Thissen, D., Steinberg, L., &amp; Wainer, H. (1993). Detection of differential item functioning using the parameters of item response models. In P. W. Holland &amp; H. Wainer (Eds.), <italic>Differential item functioning</italic> (pp. 67–113). Lawrence Erlbaum Associates.</mixed-citation></ref>
<ref id="r60"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Tutz</surname>, <given-names>G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Schauberger</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2015</year>). <article-title>A penalty approach to differential item functioning in Rasch models.</article-title> <source>Psychometrika</source>, <volume>80</volume>(<issue>1</issue>), <fpage>21</fpage>–<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-013-9377-6</pub-id><pub-id pub-id-type="pmid">24297435</pub-id></mixed-citation></ref>
<ref id="r61"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Xiao</surname>, <given-names>Y.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Liu</surname>, <given-names>H.</given-names></string-name></person-group> (<year>2024</year>). <article-title>A state response measurement model for problem-solving process data.</article-title> <source>Behavior Research Methods</source>, <volume>56</volume>(<issue>1</issue>), <fpage>258</fpage>–<lpage>277</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-022-02042-9</pub-id><pub-id pub-id-type="pmid">36597007</pub-id></mixed-citation></ref>
<ref id="r62"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Xiao</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Veldkamp</surname>, <given-names>B.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Liu</surname>, <given-names>H.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Combining process information and item response modeling to estimate problem-solving ability.</article-title> <source>Educational Measurement: Issues and Practice</source>, <volume>41</volume>(<issue>2</issue>), <fpage>36</fpage>–<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1111/emip.12474</pub-id></mixed-citation></ref>
<ref id="r63"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Yuan</surname>, <given-names>K. H.</given-names></string-name>, <string-name name-style="western"><surname>Liu</surname>, <given-names>H.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Han</surname>, <given-names>Y.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Differential item functioning analysis without a priori information on anchor items: QQ plots and graphical test.</article-title> <source>Psychometrika</source>, <volume>86</volume>(<issue>2</issue>), <fpage>345</fpage>–<lpage>377</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-021-09746-5</pub-id><pub-id pub-id-type="pmid">33656627</pub-id></mixed-citation></ref>
<ref id="r64"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zhan</surname>, <given-names>P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Qiao</surname>, <given-names>X.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Diagnostic classification analysis of problem-solving competence using process data: An item expansion method.</article-title> <source>Psychometrika</source>, <volume>87</volume>(<issue>4</issue>), <fpage>1529</fpage>–<lpage>1547</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-022-09855-9</pub-id><pub-id pub-id-type="pmid">35389193</pub-id></mixed-citation></ref>
</ref-list><fn-group><fn fn-type="financial-disclosure">
<p content-type="fn-title">This research project is supported by the Fundamental Research Funds for the Central Universities of Beijing Language and Culture University (25ZX01).</p></fn><fn fn-type="conflict">
<p content-type="fn-title">The authors have no relevant financial or non-financial interests to disclose.</p></fn></fn-group>
	<sec sec-type="data-availability" id="das"><title>Data Availability</title>
		<p>The data and code that support the findings of this study are available in the OSF repository at <xref ref-type="supplementary-material" rid="r17">Han and Ji (2025)</xref>. Supplemental procedures and analyses for this study are available at <xref ref-type="supplementary-material" rid="r18">Han et al. (2025)</xref>.</p>
	</sec>	

	
	
	
	<sec sec-type="supplementary-material" id="sp1"><title>Supplementary Materials</title>
		<table-wrap position="anchor">
			<table frame='void' style="background-#f3f3f3">
				<col width="60%" align="left"/>
				<col width="40%" align="left"/>
				<thead>
					<tr>
						<th>Type of supplementary materials</th>
						<th>Availability/Access</th>
					</tr>
				</thead>
				<tbody>
					<tr>
						<th colspan="2">Data</th>						
					</tr>
					<tr>
						<td>Empirical study - data.</td>
						<td><xref ref-type="supplementary-material" rid="r17">Han and Ji (2025)</xref></td>
					</tr>
					<tr>
						<td>Simulation study - data.</td>
						<td><xref ref-type="supplementary-material" rid="r17">Han and Ji (2025)</xref></td>
					</tr>					
					<tr style="grey-border-top-dashed">
						<th colspan="2">Code</th>
					</tr>
					<tr>
						<td>Empirical study - R code.</td>
						<td><xref ref-type="supplementary-material" rid="r17">Han and Ji (2025)</xref></td>
					</tr>
					<tr>
						<td>Simulation study - R code.</td>
						<td><xref ref-type="supplementary-material" rid="r17">Han and Ji (2025)</xref></td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Material</th>
					</tr>
					<tr>
						<td>Supplemental procedures and analyses.</td>
						<td><xref ref-type="supplementary-material" rid="r18">Han et al. (2025)</xref></td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Study/Analysis preregistration</th>
					</tr>	
					<tr>
						<td>The study was not preregistered.</td>
						<td>&mdash;</td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Other</th>
					</tr>	
					<tr>
						<td>No other materials available.</td>
						<td>&mdash;</td>
					</tr>
									</tbody>
			</table>
		</table-wrap>		
	</sec>
			

<ack>
<p>The authors have no additional (i.e., non-financial) support to report.</p>
</ack>
</back>
</article>
