<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD with MathML3 v1.2 20190208//EN" "JATS-journalpublishing1-mathml3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en">
<front>
<journal-meta><journal-id journal-id-type="publisher-id">METH</journal-id><journal-id journal-id-type="nlm-ta">Methodology</journal-id>
<journal-title-group>
<journal-title>Methodology</journal-title><abbrev-journal-title abbrev-type="pubmed">Methodology</abbrev-journal-title>
</journal-title-group>
<issn pub-type="ppub">1614-1881</issn>
<issn pub-type="epub">1614-2241</issn>
<publisher><publisher-name>PsychOpen</publisher-name></publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">meth.18925</article-id>
<article-id pub-id-type="doi">10.5964/meth.18925</article-id>
<article-categories>
<subj-group subj-group-type="heading"><subject>Original Article</subject></subj-group>

<subj-group subj-group-type="badge">
<subject>Data</subject>
<subject>Code</subject>
<subject>Materials</subject>
</subj-group>

</article-categories>
<title-group>
<article-title>A Comparison of Optimization Algorithms for Forced-Choice Questionnaire Assembly</article-title>
<alt-title alt-title-type="right-running">Comparison of Optimization Algorithms for Forced-Choice</alt-title>
<alt-title specific-use="APA-reference-style" xml:lang="en">A comparison of optimization algorithms for forced-choice questionnaire assembly</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0009-0008-8836-2933</contrib-id><name name-style="western"><surname>Escudero</surname><given-names>Scarlett</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref></contrib>
<contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-5234-5217</contrib-id><name name-style="western"><surname>Sorrel</surname><given-names>Miguel A.</given-names></name><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0001-5199-9828</contrib-id><name name-style="western"><surname>Kreitchmann</surname><given-names>Rodrigo S.</given-names></name><xref ref-type="aff" rid="aff3"><sup>3</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0001-6728-2709</contrib-id><name name-style="western"><surname>Abad</surname><given-names>Francisco J.</given-names></name><xref ref-type="aff" rid="aff2"><sup>2</sup></xref></contrib>
<contrib contrib-type="editor">
<name>
	<surname>Nájera Álvarez</surname>
	<given-names>Pablo</given-names>
</name>
<xref ref-type="aff" rid="aff4"/>
</contrib>
<aff id="aff1"><label>1</label><institution content-type="dept">Department of Educational Psychology</institution>, <institution>University of Minnesota</institution>, <addr-line><city>Minneapolis</city></addr-line>, MN, <country country="US">USA</country></aff>
<aff id="aff2"><label>2</label><institution content-type="dept">Department of Social Psychology and Methodology, Faculty of Psychology</institution>, <institution>Universidad Autónoma de Madrid</institution>, <addr-line><city>Madrid</city></addr-line>, <country country="ES">Spain</country></aff>
<aff id="aff3"><label>3</label><institution content-type="dept">Department of Methodology of Behavioral Sciences, Faculty of Psychology</institution>, <institution>Universidad Nacional de Educación a Distancia</institution>, <addr-line><city>Madrid</city></addr-line>, <country country="ES">Spain</country></aff>
	<aff id="aff4">Universidad Pontificia Comillas, Madrid, <country>Spain</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>*</label>Faculty of Psychology, Universidad Autónoma de Madrid, 6 Iván Pavlov St, Cantoblanco Campus, Madrid, Spain, 28049. <email xlink:href="miguel.sorrel@uam.es">miguel.sorrel@uam.es</email></corresp>
</author-notes>
<pub-date date-type="pub" publication-format="electronic"><day>30</day><month>06</month><year>2026</year></pub-date>
<pub-date pub-type="collection" publication-format="electronic"><year>2026</year></pub-date>
<volume>22</volume>
<issue>2</issue>

<fpage>172</fpage>
<lpage>194</lpage>
<history>
<date date-type="received">
<day>18</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>03</month>
<year>2026</year>
</date>
</history>
<permissions><copyright-year>2026</copyright-year><copyright-holder>Escudero, Sorrel, Kreitchmann, &amp; Abad</copyright-holder><license license-type="open-access" specific-use="CC BY 4.0" xlink:href="https://creativecommons.org/licenses/by/4.0/"><ali:license_ref>https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open access article distributed under the terms of the Creative Commons Attribution 4.0 International License, CC BY 4.0, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p></license></permissions>
<abstract>
	<p>Forced-choice questionnaires (FCQs) are increasingly favored over traditional Likert-type formats due to their reduced susceptibility to faking and social desirability (SD). Their construction typically involves pairing items from existing single-stimulus banks. This study compares four methods for assembling FCQs: a genetic algorithm (GA), two simulated annealing (SA) strategies (blueprint-based and scale-parameter-optimized), and brute-force (BF) random search. These methods are evaluated via simulation and an empirical example, focusing on trait score recovery. The effects of questionnaire length and SD matching on recovery are also examined. Three item banks varying in the <italic>a<sub>j</sub>-SD<sub>j</sub></italic> relationship and inclusion of heteropolar blocks were used to assess performance across pairing scenarios. GA consistently produced the most reliable scores, followed by SA with <italic>a<sub>j</sub></italic> optimization. All examined factors significantly affected reliability. GA is recommended for FCQ assembly, especially with short questionnaires, no heteropolar blocks, and high <italic>a<sub>j</sub>-SD<sub>j</sub></italic> correlation.</p>
</abstract>
<kwd-group kwd-group-type="author"><kwd>forced-choice questionnaires</kwd><kwd>reliability</kwd><kwd>optimal assembly</kwd><kwd>genetic algorithm</kwd><kwd>simulated annealing algorithm</kwd><kwd>brute-force</kwd></kwd-group>

</article-meta>
</front>
<body>
	<sec sec-type="intro" id="intro"><title/>		
<p>Non-cognitive constructs have long been of interest in psychological research and have traditionally been measured using single-stimulus formats (SS), such as the Likert format. However, this format has been known to suffer from response biases such as acquiescence and faking, which can undermine reliability, validity, and variability of the observed scores (<xref ref-type="bibr" rid="r32">Salgado, 2016</xref>), and alter the item covariance structure (<xref ref-type="bibr" rid="r27">McCrae et al., 2001</xref>). A solution that has shown promise in solving some of these problems is the forced-choice (FC) format. This format increases criterion-related validity (<xref ref-type="bibr" rid="r33">Salgado &amp; Táuriz, 2014</xref>) and reduces both faking (<xref ref-type="bibr" rid="r5">Cao &amp; Drasgow, 2019</xref>) and other response biases (<xref ref-type="bibr" rid="r21">Kreitchmann et al., 2019</xref>). However, recent studies have emphasized that these advantages are only achievable if forced-choice questionnaires (FCQs) are carefully designed (<xref ref-type="bibr" rid="r13">Graña et al., 2025</xref>).</p>
<p>Given the numerous possible item combinations involved in block construction, several procedures have been developed to optimize the assembly process. However, to date, a comprehensive comparison of these methods is lacking. The present study aims to address this gap by evaluating the most effective method for assembling FCQs from a SS item bank. The comparison is limited to the two options available in the software at the time of writing, namely the genetic algorithm (GA; <xref ref-type="bibr" rid="r20">Kreitchmann et al., 2022</xref>) and the simulated annealing algorithm (SA; <xref ref-type="bibr" rid="r24">Li et al., 2022</xref>). While a linear programming approach is possible, it can be substantially more computationally demanding and should be explored in detail in future research. Other heuristics, such as ant colony optimization, have not yet been applied. Therefore, a systematic comparison of available methods is needed to guide researchers in assembling FCQs, especially since GA is implemented in a Shiny app<xref ref-type="fn" rid="fn1"><sup>1</sup></xref><fn id="fn1"><label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://psychometricmodelling.shinyapps.io/FCoptimization/">https://psychometricmodelling.shinyapps.io/FCoptimization/</ext-link></p></fn> and SA is available in the <italic>autoFC</italic> R package.<xref ref-type="fn" rid="fn2"><sup>2</sup></xref><fn id="fn2"><label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/autoFC/index.html">https://cran.r-project.org/web/packages/autoFC/index.html</ext-link></p></fn></p>

<sec><title>Forced-Choice Questionnaire Design and Modeling</title>
<p>In recent years, there has been a growing trend toward the development and use of FCQs to assess non-cognitive constructs (<xref ref-type="bibr" rid="r23">Lee et al., 2025</xref>). The FC format can be distinguished from SS format in that a choice must be made among the alternatives rather than rating each statement. A commonly used example is a FC block consisting of item pairs, in which two SS items are presented together and the respondent is asked to choose one over the other (see <xref ref-type="fig" rid="f1">Figure 1</xref>; for a comprehensive review, see <xref ref-type="bibr" rid="r15">Hontangas et al., 2015</xref>).</p><fig id="f1" position="float" fig-type="figure" orientation="portrait"><label>Figure 1</label><caption>
<title>Examples of Non-Cognitive Questionnaire Formats</title></caption><graphic xlink:href="meth.18925-f1" position="float" orientation="portrait"/></fig>
<p>Despite the advantages of FCQs over SS, this response format can introduce fully or partially ipsative scores. Ipsativity refers to the interdependence among trait scores, meaning that if a person scores higher on one trait, they must score lower on another. This can affect validity. For example, in a purely ipsative FCQ, the validity coefficients of all measured traits with respect to a given external criterion will sum (and average) to zero (<xref ref-type="bibr" rid="r14">Hicks, 1970</xref>). Full rank of the effective FCQ loading structure is a necessary condition for achieving non-ipsative scores, as noted in <xref ref-type="bibr" rid="r2">Brown (2016)</xref>. Within the Thurstonian item response theory (IRT) framework, this condition is typically expressed in terms of the factor loading matrix. For instance, the FCQ loading matrix becomes rank-deficient when the loadings within every block or within every dimension are equal. A straightforward way to avoid this issue is to combine items with factor loadings of opposite sign within each block. Under alternative IRT parameterizations, however, the same rank condition can be expressed in terms of item scale parameters rather than factor loadings. <xref ref-type="bibr" rid="r28">Morillo (2018</xref>, p. 74) further illustrates that under such parameterizations, the matrix can also become rank-deficient when the scale parameters of the two dimensions represented in a block maintain a constant ratio across all blocks measuring the same pair of dimensions. While these conditions are violated in classical test theory scoring using blocks of equally keyed items (with equal weights for all items), under IRT modeling, where scale parameters are allowed to vary, such violations occur only on rare occasions. Some available IRT models allow researchers formally characterize the response processes underlying FC formats and obtain non-ipsative scores (e.g., <xref ref-type="bibr" rid="r3">Brown &amp; Maydeu-Olivares, 2011</xref>; <xref ref-type="bibr" rid="r30">Morillo et al., 2016</xref>; <xref ref-type="bibr" rid="r35">Stark et al., 2005</xref>).</p>
<p>Ipsativity is therefore a property of the scoring method rather than of the FC format itself. Hence, ipsativity can be addressed with models such as the Multi-Unidimensional Pairwise Preference Two-Parameter Logistic (MUPP-2PL; <xref ref-type="bibr" rid="r30">Morillo et al., 2016</xref>) or the Thurstonian IRT for FC data (TIRT; <xref ref-type="bibr" rid="r3">Brown &amp; Maydeu-Olivares, 2011</xref>). For binary FC comparisons, these models yield nearly equivalent response probabilities, although differences increase for block sizes greater than two. Test assembly assumes that item parameters are invariant when moving from SS to pairwise FC administrations. Empirical evidence supports approximate invariance: <xref ref-type="bibr" rid="r26">Lin and Brown (2017)</xref> found TIRT parameters largely stable across block compositions (quads vs. triplets), while <xref ref-type="bibr" rid="r29">Morillo et al. (2019)</xref> reported that MUPP-2PL parameters from FC blocks closely matched those from graded-scale formats. In this study, we adopt the MUPP-2PL framework, for which invariance between Likert-type and FC formats has been directly evaluated, showing correlations above .90 across formats.</p>
<sec><title>Multi-Unidimensional Pairwise Preference Two-Parameter Logistic Model</title>
	<p>The MUPP-2PL model can be used in dichotomous FC blocks, where the probability of agreement with response option is modeled following a 2PL function. The model includes an invariance assumption, which states that item parameters remain constant regardless of the response format (i.e., FC vs. Likert) and the within-block context (which item is paired with another). Therefore, <inline-formula><mml:math id="m6"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m7"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> in <xref ref-type="disp-formula" rid="e">Equation (1)</xref> should be the same for 2PL items and in the FC block. The block characteristic function is:</p><disp-formula id="e"><label>1</label>	<mml:math id="m8"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>P</mml:mi><mml:mfenced><mml:mrow><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>|</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mo>−</mml:mo><mml:mfenced><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub><mml:msub><mml:mi>θ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="true">˜</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub><mml:msub><mml:mi>θ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="true">˜</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:msup></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math></disp-formula><disp-formula id="e___1"><label>2</label>	<mml:math id="m9"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="m10"><mml:mrow><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> indicates that the respondent selected item <inline-formula><mml:math id="m11"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> (the first item in the pair); <inline-formula><mml:math id="m12"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represents a person’s position on each of the <inline-formula><mml:math id="m13"><mml:mi>D</mml:mi></mml:math></inline-formula> latent traits measured by the FCQ; <inline-formula><mml:math id="m14"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="true">˜</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m15"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="true">˜</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> are the coordinates of <inline-formula><mml:math id="m16"><mml:mrow><mml:msub><mml:mi>θ</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for items <inline-formula><mml:math id="m17"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m18"><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>; <inline-formula><mml:math id="m19"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m20"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> are the scale parameters; <inline-formula><mml:math id="m21"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the block intercept parameter; and <inline-formula><mml:math id="m22"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m23"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:msub><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>are the location parameters. The sign of the scale (discrimination) parameter determines the item’s polarity. A positive scale parameter signifies a positively keyed item, where higher trait levels are associated with greater agreement. Conversely, a negative scale parameter indicates a negatively keyed item, where higher trait levels correspond to lower agreement. If both items have the same sign, either positive or negative, the block is considered homopolar or equally keyed; otherwise, it is called heteropolar or unequally keyed. In any case, it must be ensured that the resulting factor loading matrix is of full rank so that non-ipsative scores can be obtained.</p></sec></sec>
<sec><title>Forced-Choice Questionnaire Optimal Assembly</title>
<p>One common approach for designing a FCQ is to pair items from a SS item bank to form blocks. This process can yield thousands of potential FCQs with varying levels of reliability. Because reliability is essential for validity, assembling blocks without considering item properties may result in suboptimal tests. With the growing use of FCQs, the need for optimal assembly procedures has become increasingly important.</p>
<sec><title>Social Desirability</title>
<p>One important consideration in the design of FCQs is the control of social desirability (SD). If SD matching (SDM) is not carefully considered during block construction, differences in item SDs are likely to emerge. This is especially true for heteropolar pairs. Such differences may compromise the validity of the questionnaire scores by making it easier for respondents to select socially desirable options instead of those that reflect their true traits. Matching items by their level of SD is a widely recommended strategy for reducing faking in FCQs (<xref ref-type="bibr" rid="r5">Cao &amp; Drasgow, 2019</xref>; <xref ref-type="bibr" rid="r31">Pavlov et al., 2021</xref>).</p>
<p>One approach for matching items based on SD involves convening an expert committee to rate the desirability level of each item. The average desirability rating is then calculated for each item across all raters and items are matched by minimizing the absolute difference between items’ mean desirability values. Items are considered well-matched when the difference in SD falls below a predetermined cutoff. For homopolar blocks, a common cutoff is 0.50 on a 5-point scale, whereas for heteropolar blocks, the cutoff typically needs to be relaxed in order to ensure that a sufficient number of valid heteropolar pairs can be formed (<xref ref-type="bibr" rid="r13">Graña et al., 2025</xref>; <xref ref-type="bibr" rid="r25">Li et al., 2025</xref>).</p></sec>
<sec><title>Scale Parameters</title>
<p>The ability to obtain normative scoring from FCQ responses through IRT relies on the differential weighting of responses (<xref ref-type="bibr" rid="r4">Bürkner, 2022</xref>), which is driven by the scale parameters. Heteropolar blocks naturally lead to this differential weighting. However, this is a topic of debate in FCQ design since pairing them can be tricky and increase cognitive demand. On the one hand, studies (e.g., <xref ref-type="bibr" rid="r3">Brown &amp; Maydeu-Olivares, 2011</xref>; <xref ref-type="bibr" rid="r11">Frick et al., 2023</xref>) suggest that including heteropolar blocks can enhance estimation accuracy and validity. On the other hand, recent empirical findings (<xref ref-type="bibr" rid="r13">Graña et al., 2025</xref>) question whether heteropolar blocks are actually necessary. These authors suggest that if FCQs are assembled with homopolar blocks that differ in scale parameters, heteropolar blocks are not necessary. This is an ongoing debate, with many authors suggesting that including approximately 15–40% heteropolar blocks can enhance the psychometric properties of FCQs, even if it means accepting a minor compromise in SD (<xref ref-type="bibr" rid="r22">Lee et al., 2022</xref>; <xref ref-type="bibr" rid="r25">Li et al., 2025</xref>). Thus, achieving faking resistance requires careful SDM, especially when heteropolar blocks are included.</p></sec>
<sec><title>Algorithms for Optimal Assembly</title>
<sec><title>Simulated Annealing Algorithm</title>
	<p>The simulated annealing algorithm is a heuristic optimization method inspired by the physical annealing of solids. It operates in two steps (<xref ref-type="bibr" rid="r19">Kirkpatrick et al., 1983</xref>): first, the system’s temperature is raised to a maximum, then gradually decreased until a minimum is reached, minimizing the system’s energy, which corresponds to the cost of a solution. A key feature of SA is its ability to accept worse solutions to escape local optima. <xref ref-type="bibr" rid="r24">Li et al. (2022)</xref> implemented SA to assemble FCQs in the <italic>autoFC</italic> R package. The procedure begins with a user-defined blueprint specifying the number of blocks, block size, trait composition, and keying constraints. Each item is characterized by numerical attributes, such as SD or factor loadings, and the algorithm computes a weighted composite energy for each block, combining block-level indices for each attribute using user-specified weights so that higher absolute values reflect more desirable configurations. Heteropolar blocks are not included in the energy calculation; instead, the user defines specific trait combinations, and blocks meeting these conditions are randomly selected. Starting from a random admissible assembly, the algorithm iteratively swaps or replaces items, accepting changes that lower energy while occasionally allowing higher-energy solutions early on to avoid local optima. As the temperature decreases, the search converges on the lowest-energy arrangement, yielding FC blocks that satisfy the psychometric constraints. For a detailed tutorial, see <xref ref-type="bibr" rid="r25">Li et al. (2025)</xref>. Version 0.2.0.1002 of the <italic>autoFC</italic> package was used in this study.</p></sec>
<sec><title>Genetic Algorithm</title>
<p>Genetic algorithms are heuristic optimization methods inspired by principles of population genetics. <xref ref-type="bibr" rid="r20">Kreitchmann et al. (2022)</xref> adapted the GA for FCQ assembly to maximize the marginal reliability of selected blocks. In this approach, new candidate blocks are generated using a node histogram-based sampling algorithm, which constructs probabilistic models from the genotypes of previous generations. Each new genotype is formed in two steps: first, a portion of the parent genotype is directly passed to the offspring as a template; second, the remaining elements are sampled from a conditional probability distribution capturing dependencies observed in prior generations, with a mutation factor added as noise. Candidates are then evaluated against their parents based on constraint compliance and the objective function, which is to maximize block reliability, and the best candidates advance to the next generation. Block content constraints are represented in a <inline-formula><mml:math id="m24"><mml:mrow><mml:mi>J</mml:mi><mml:mi>x</mml:mi><mml:mi>J</mml:mi></mml:mrow></mml:math></inline-formula> matrix (<bold>C</bold>), where each cell is 1 if a pair of items can be combined into a block based on content criteria and 0 if not. The node histogram records the frequency of item selection within a generation. This iterative process continues until convergence, producing FCQ blocks that satisfy psychometric constraints and maximize reliability. More details on the algorithm can be found in <xref ref-type="bibr" rid="r20">Kreitchmann et al. (2022)</xref>.</p></sec></sec></sec>
<sec><title>The Present Study</title>
<p>To date, there has been no comprehensive evaluation of existing methods for the optimal assembly of FCQs. The purpose of this study is to compare such methods: the GA, the SA algorithm, in two approaches, one as blueprint and the other optimizing the <inline-formula><mml:math id="m25"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> parameter differences, and a brute-force (BF) search, focusing on the reliability of the assembled questionnaires’ scores and computational cost. The two SA variants differ in their optimization criterion; the former employs the basic functionality of the package, incorporating only content and SDM constraints, without considering item parameters, whereas the latter further defines an additional criterion that seeks to maximize differences in the scale parameters within each item pair. This study makes a novel contribution by incorporating SD constraints into GA and exploring how its relationship with item parameters may affect the block assembly process. We conducted a simulation study evaluating the four methods and an empirical illustration. We hypothesize that: (1) GA will perform best in terms of reliability, as it incorporates the reliability of the assembled blocks as the objective function to optimize, and be followed by the SA with scale parameter, <inline-formula><mml:math id="m26"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, optimization method, SA blueprint, and BF; (2) there should be no significant differences in the inclusion or exclusion of heteropolar blocks; (3) a higher correlation of <inline-formula><mml:math id="m27"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> with <inline-formula><mml:math id="m28"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is expected to reduce performance; and (4) SDM will negatively affect trait recovery. These hypotheses were not preregistered.</p></sec></sec>
<sec sec-type="methods"><title>Method</title>
<sec><title>Simulation Design and Data Generation</title>
<p>Three factors were systematically manipulated in the simulation study. For clarity, we categorize these factors into two groups, block factors, which relate to the FCQ construction, and an item factor, which pertains to the construction of the item banks. The block factors are: 1) questionnaire length (40, 80), and 2) use of SDM (Yes, No). The item factor is: 3) degree of correlation between scale parameter, <inline-formula><mml:math id="m29"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m30"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> of the positively keyed bank, which relates to the types of blocks formed (homopolar vs. heteropolar), and results in four levels (<inline-formula><mml:math id="m31"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.20</mml:mn></mml:mrow></mml:math></inline-formula> with 25% heteropolar blocks, <inline-formula><mml:math id="m32"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.20</mml:mn></mml:mrow></mml:math></inline-formula> with 0% heteropolar blocks, <inline-formula><mml:math id="m33"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.50</mml:mn></mml:mrow></mml:math></inline-formula> with 0% heteropolar blocks, <inline-formula><mml:math id="m34"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.80</mml:mn></mml:mrow></mml:math></inline-formula> with 0% heteropolar blocks). Hereafter, “+” indicates items that are positively keyed to the trait, and “-” indicates items that are negatively keyed. All factors are fully crossed, resulting in 16 conditions.</p>
<p>First, the questionnaire length factor determines how many pairwise blocks are formed. We established two levels, 80 and 40 blocks. Second, the SDM factor indicates whether item pairs were matched based on their SD ratings, with two levels, yes (matching applied) and no (matching not applied). When SDM is applied, the absolute difference between two items’ <inline-formula><mml:math id="m35"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> ratings was calculated; if this difference exceeded a predefined cutoff, the items were not eligible for pairing. We established a cutoff of 0.5 for homopolar blocks to ensure a stricter level of matching. However, since it is more difficult to find heteropolar blocks that match in SD (<xref ref-type="bibr" rid="r13">Graña et al., 2025</xref>), the cutoff was relaxed to 0.75 for heteropolar blocks.</p>
<p>Since the third factor pertains to the generation of the item banks, we will describe the item bank generation process together. One five-dimensional bank of 320 SS items was generated for each condition and replication, as to imitate personality item pools, such as the International Personality Item Pool (IPIP; <xref ref-type="bibr" rid="r12">Goldberg, 1999</xref>). Three prototypical item banks were created. The main difference among them lies in the degree of association between the scale parameter, <inline-formula><mml:math id="m36"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m37"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> of the positively keyed items. All banks were balanced with 64 items per trait; in the mixed keyed bank, each trait had 32 positive and negative items. Each simulation condition used a separate item bank, corresponding to one of the three bank types described below.</p>
<p>The item banks differed in the degree of correlation between <inline-formula><mml:math id="m38"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m39"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, as well as in block polarity, resulting in four distinct categories (<xref ref-type="table" rid="t1">Table 1</xref>). Bank 1 was used for Categories 1 and 2 (and Levels 1 and 2 of the third simulation factor). It included both positively keyed items and negatively keyed items, with a small correlation between <inline-formula><mml:math id="m40"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m41"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for both types (<inline-formula><mml:math id="m42"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>−</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>−</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.20</mml:mn></mml:mrow></mml:math></inline-formula>) and a naturally high correlation across keys (<inline-formula><mml:math id="m43"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.86</mml:mn></mml:mrow></mml:math></inline-formula>). Using this bank, Category 1 formed a FCQ with 25% heteropolar blocks, while Category 2 included only homopolar blocks (0% heteropolar).</p>
<table-wrap id="t1" position="anchor" orientation="landscape">
<label>Table 1</label><caption><title>Parameter Simulation Specifications</title></caption>
<table frame="hsides" rules="groups">
<col width="" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th/>
<th colspan="2" scope="colgroup">Bank 1<hr/></th>
<th>Bank 2<hr/></th>
<th>Bank 3<hr/></th>
</tr>
<tr>
<th valign="bottom">Parameter</th>	
<th scope="colgroup">(+)</th>
<th>(-)</th>
<th>(+)</th>
<th>(+)</th>
</tr>
</thead>
<tbody>
<tr>
<td>	<inline-formula><mml:math id="m44"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td><italic>N</italic>(1.5,0.5)</td>
<td><italic>N</italic>(-1.5,0.5)</td>
<td><italic>N</italic>(1.5,0.5) truncated at 0</td>
<td><italic>N</italic>(1.5,0.5) truncated at 0</td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m45"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td><italic>N</italic>(-0.5,0.8) truncated at -3</td>
<td><italic>N</italic>(-1,0.8) truncated at -3</td>
<td><italic>N</italic>(-0.5,0.8)</td>
<td><italic>N</italic>(-0.5,0.8)</td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m46"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td><italic>N</italic>(4,0.5) truncated at 1 and 5</td>
<td><italic>N</italic>(2,0.5) truncated at 1 and 5</td>
<td><italic>N</italic>(4,0.5) truncated at 1 and 5</td>
<td><italic>N</italic>(4,0.5) truncated at 1 and 5</td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m47"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">.20</td>
<td align="char" char=".">.20</td>
<td align="char" char=".">.50</td>
<td align="char" char=".">.80</td>
</tr>
<tr>
<td>	<inline-formula><mml:math id="m48"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">.30</td>
<td align="char" char=".">-.30</td>
<td align="char" char=".">.30</td>
<td align="char" char=".">.30</td>
</tr>
<tr>
<td><inline-formula><mml:math id="m49"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></td>
<td align="char" char=".">.00</td>
<td align="char" char=".">.00</td>
<td align="char" char=".">.00</td>
<td align="char" char=".">.00</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Note</italic>. <inline-formula><mml:math id="m50"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> = scale parameter; <inline-formula><mml:math id="m51"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> = block intercept parameter; <inline-formula><mml:math id="m52"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> = social desirability of each item; <inline-formula><mml:math id="m53"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> = correlation between <inline-formula><mml:math id="m54"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m55"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>; <inline-formula><mml:math id="m56"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> = correlation between <inline-formula><mml:math id="m57"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m58"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>; <inline-formula><mml:math id="m59"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> = correlation between <inline-formula><mml:math id="m60"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m61"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>.</p>
</table-wrap-foot>
</table-wrap>
<p>Bank 1, which reflects a more realistic scenario, is expected to pose less difficulty in assembling reliable tests due to the low correlation between <inline-formula><mml:math id="m62"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m63"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. To explore more challenging conditions, we included two additional item banks. Levels 3 and 4 of the third simulation factor (and Categories 3 and 4) correspond to Banks 2 and 3, respectively; both consist of positively keyed items and therefore can form only homopolar blocks. Bank 2 was generated with a moderate correlation between the scale parameter, <inline-formula><mml:math id="m64"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m65"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> (<inline-formula><mml:math id="m66"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.50</mml:mn></mml:mrow></mml:math></inline-formula>), while Bank 3 has a high correlation (<inline-formula><mml:math id="m67"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.80</mml:mn></mml:mrow></mml:math></inline-formula>), making optimal block matching more difficult. The <inline-formula><mml:math id="m68"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> values were sampled this way to represent that, in real contexts, when traits are scored in the socially desirable direction (i.e., conscientiousness, emotional stability), positively keyed items tend to have higher desirability. The values in these distributions were primarily based on the empirical distributions of <inline-formula><mml:math id="m69"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="m70"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m71"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> reported in the publicly available datasets from <xref ref-type="bibr" rid="r18">Johnson (2014)</xref> and <xref ref-type="bibr" rid="r16">Hughes et al. (2021)</xref>, while also considering that positively keyed items tend to exhibit higher SD values than negatively keyed items (<xref ref-type="bibr" rid="r13">Graña et al., 2025</xref>; <xref ref-type="bibr" rid="r25">Li et al., 2025</xref>). Under our scoring convention, positively keyed items for each trait were defined as the socially desirable direction (e.g., for items measuring Neuroticism, a positively keyed item has a lower SD rating). These choices were made to represent realistic item parameters and SD behavior. This also includes the correlation between <inline-formula><mml:math id="m72"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m73"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> (approximately 0.30 for both positively and negatively keyed items). The correlation between <inline-formula><mml:math id="m74"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m75"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> was kept at zero so that <inline-formula><mml:math id="m76"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> would only be linked to <inline-formula><mml:math id="m77"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, allowing us to analyze the impact of this variable.</p><?table t1?>
	<p>The structure of the simulation is as follows. First, a SS item bank was generated for each condition and replication. Second, a FCQ was constructed with each algorithm using the SS parameters. Then, a binary FC response dataset of 5,000 respondents was simulated using the MUPP-2PL for each assembled FCQ. Finally, the MUPP-2PL was estimated, and trait recovery was assessed for each questionnaire. Trait estimates (<inline-formula><mml:math id="m78"><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>) were obtained as maximum a posteriori scores with the Metropolis-Hastings Robbins-Monro algorithm. These analyses were conducted using the <italic>mirt</italic> R package (<xref ref-type="bibr" rid="r6">Chalmers, 2012</xref>). This process was replicated 50 times. We controlled for the following content constraints: (1) block multidimensionality (i.e., each block had to include items measuring two different traits), and (2) trait balance across the selected blocks (i.e., each trait had to be represented by the same number of items). Item repetition was not allowed. During FCQ assembly, we verified that content and polarity constraints were met and that the distribution of blocks and items across traits remained generally balanced, allowing a fair comparison of the algorithms. Additionally, we conducted simulation checks to evaluate if any of the resulting scale parameter matrices after FCQ assembly were rank restricted by analyzing the least singular value of such matrices. Across all conditions, assembly algorithms, and replications, the least singular value was strictly greater than zero, indicating that none of the matrices were rank-deficient. We recorded the time spent in seconds for each assembly algorithm. In the case of BF, 100 questionnaires that met the specified constraints and SD requirements were randomly formed and the one with the highest reliability was selected. The R code used for the analysis and the empirical study can be found at <xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref>. The repository also includes a document detailing all algorithm specifications. All procedures were executed with a 2.50 GHz Intel Core i9-11900 CPU and 32 GB of RAM.</p>
<sec><title>Measures of Trait Recovery</title>
	<p>To compare the assembly methods, the main dependent variable was trait score recovery. Specifically, we computed for each replica and trait: (1) the true reliability, calculated using the squared correlation between estimated and true <inline-formula><mml:math id="m79"><mml:mi>θ</mml:mi></mml:math></inline-formula> (<inline-formula><mml:math id="m80"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>); (2) the root mean square error of <inline-formula><mml:math id="m81"><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, both overall (<inline-formula><mml:math id="m82"><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula>; <xref ref-type="disp-formula" rid="e___2">Equation 3)</xref> and conditional to the true <inline-formula><mml:math id="m83"><mml:mi>θ</mml:mi></mml:math></inline-formula> (<inline-formula><mml:math id="m84"><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub><mml:mfenced><mml:mi>θ</mml:mi></mml:mfenced></mml:mrow></mml:math></inline-formula>). The conditional <inline-formula><mml:math id="m85"><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> was calculated within intervals of the true <inline-formula><mml:math id="m86"><mml:mi>θ</mml:mi></mml:math></inline-formula> to examine how estimation accuracy varies across the latent continuum. Individuals were grouped into bins of 0.5 spanning from -2 to 2, and within each bin the RMSE was computed from the squared estimation errors of all individuals in that group. Additionally, an indicator of ipsativity was included, consisting of (3) the average trait correlation bias (<inline-formula><mml:math id="m87"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula>; <xref ref-type="disp-formula" rid="e___3"> Equation 4)</xref>.</p><disp-formula id="e___2"><label>3</label>	<mml:math id="m88"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>S</mml:mi></mml:mfrac><mml:msubsup><mml:mstyle mathsize="140%" displaystyle="true"><mml:mo>∑</mml:mo></mml:mstyle><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>S</mml:mi></mml:msubsup><mml:msup><mml:mrow><mml:mfenced><mml:mrow><mml:mover accent="true"><mml:msub><mml:mi>θ</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>^</mml:mo></mml:mover><mml:mo>−</mml:mo><mml:msub><mml:mi>θ</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:msqrt></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math></disp-formula><disp-formula id="e___3"><label>4</label>	<mml:math id="m89"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub><mml:mo>=</mml:mo><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>−</mml:mo><mml:mi>Φ</mml:mi><mml:mo>,</mml:mo><mml:mo> </mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="m90"><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and <inline-formula><mml:math id="m91"><mml:mtext>Φ</mml:mtext></mml:math></inline-formula> are the estimated and true trait correlation matrices, respectively. We used the real-world correlations from the NEO-PI-R (<xref ref-type="bibr" rid="r9">Costa &amp; McCrae, 1992</xref>). In the case of fully ipsative scores, a negative bias of <inline-formula><mml:math id="m92"><mml:mrow><mml:mo>−</mml:mo><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mfenced><mml:mrow><mml:mi>D</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mfenced></mml:mrow></mml:math></inline-formula> would be expected for <inline-formula><mml:math id="m93"><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, with <italic>D</italic> representing the number of traits (<xref ref-type="bibr" rid="r14">Hicks, 1970</xref>).  Dependent Variables (1) and (2) were calculated separately for each trait and then averaged across all five traits, whereas (3) was calculated by extracting the non-diagonal elements of <inline-formula><mml:math id="m94"><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and <inline-formula><mml:math id="m95"><mml:mtext>Φ</mml:mtext></mml:math></inline-formula>, applying Fisher’s Z-transformation to each correlation, computing the Z-differences, averaging these differences, and then back-transforming the average to the correlation metric (<xref ref-type="bibr" rid="r8">Corey et al., 1998</xref>). Results of the overall <inline-formula><mml:math id="m96"><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> and of the four-way univariate analyses of variance (ANOVA) for each dependent variable, where algorithm was treated as a within-condition factor and the simulation conditions as between-condition factors, can be found in Tables S2 and S3 in the Supplementary Material (see <xref ref-type="bibr" rid="r10">Escudero et al., 2026</xref>). Partial eta-squared (<inline-formula><mml:math id="m97"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>) values higher than .14 were considered as relevant effects (<xref ref-type="bibr" rid="r7">Cohen, 1988</xref>). Results for <inline-formula><mml:math id="m98"><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> (shown in Figure S1, <xref ref-type="bibr" rid="r10">Escudero et al., 2026</xref>) are omitted from the main text, as the conclusions are the same as for <inline-formula><mml:math id="m99"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>. Therefore, we focus on the latter, which is a more commonly used metric. ANOVA results were used to guide the interpretation of the findings.</p></sec></sec></sec>
<sec sec-type="results"><title>Results</title>
<sec><title>Algorithm Efficiency</title>
<p>The GA is notably influenced by the questionnaire length, with longer questionnaires resulting in increased duration, as shown in Table S1 in the Supplementary Material (see <xref ref-type="bibr" rid="r10">Escudero et al., 2026</xref>). Assembling questionnaires of 80 and 40 blocks took an average of 4.90 and 3.21 minutes, respectively. In contrast, the other algorithms show minimal sensitivity to questionnaire length. Both SA methods are typically completed in a few seconds. However, SA has higher skewness and kurtosis, as we implemented an iterative process that reruns the algorithm until the target design is achieved. Across all conditions and replications, the constraint on the number of heteropolar blocks was always satisfied. In a small number of replications of the SA method, however, a few blocks did not meet the SDM constraint. Out of the 400 replications with SDM, there were 28 with 1 block affected and 2 with 2 blocks affected for the SA blueprint, and SA with Scale Parameter <italic><inline-formula><mml:math id="m100"><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> </italic>optimization showed 54 with 1 block affected and 1 with 2 blocks affected. The small number of affected blocks (1 or 2 out of 40 or 80) suggests that the impact is negligible. In any case, this implies a potential advantage in reliability and a disadvantage in ipsativity for these replications.</p></sec>
<sec><title>Recovery of Trait Parameters</title>
<p><xref ref-type="table" rid="t2">Table 2</xref> presents the marginal results for the <inline-formula><mml:math id="m101"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m102"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> of the four assembly methods. GA consistently yields the highest reliability (<inline-formula><mml:math id="m103"><mml:mrow><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula>), followed by SA with <inline-formula><mml:math id="m104"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> optimization (<inline-formula><mml:math id="m105"><mml:mrow><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mn>.78</mml:mn></mml:mrow></mml:math></inline-formula>), BF (<inline-formula><mml:math id="m106"><mml:mrow><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mn>.76</mml:mn></mml:mrow></mml:math></inline-formula>), and SA blueprint (<inline-formula><mml:math id="m107"><mml:mrow><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mn>.73</mml:mn></mml:mrow></mml:math></inline-formula>). These differences indicate a large effect size (<inline-formula><mml:math id="m108"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.88</mml:mn></mml:mrow></mml:math></inline-formula>). All simulation conditions are relevant, as seen in <xref ref-type="fig" rid="f2">Figure 2</xref>. The most relevant factor was the questionnaire length, which can be expected, as longer questionnaires enhance reliability (<inline-formula><mml:math id="m109"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.98</mml:mn></mml:mrow></mml:math></inline-formula>). Shorter tests show lower reliability overall, with the same relative patterns across correlations, heteropolar proportions, and algorithms. For instance, controlling SD at <inline-formula><mml:math id="m110"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula> with 0% heteropolar blocks, GA’s reliability is .71 for Length 40, compared to .80 for Length 80. The next relevant factor was the degree of relation between the scale parameter, <inline-formula><mml:math id="m111"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m112"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> in the item bank and having heteropolar blocks or not (<inline-formula><mml:math id="m113"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.95</mml:mn></mml:mrow></mml:math></inline-formula>). The highest reliability is achieved with mixed banks forming questionnaires containing 25% heteropolar blocks; for example, for Length 80, controlling for SD at <inline-formula><mml:math id="m114"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.20</mml:mn></mml:mrow></mml:math></inline-formula>, GA achieves .88 vs. .84 in the same condition with 0% heteropolar blocks. As the correlation between the scale parameter and <inline-formula><mml:math id="m115"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> increases, reliability decreases. The same tendency can be seen in both test lengths. Specifically, the condition with the lowest reliability results is the positively keyed item bank with <inline-formula><mml:math id="m116"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula> in the 40-block test length and it is where the most differences between algorithms are seen: GA = .71, <inline-formula><mml:math id="m117"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> = .68, BF = .65, and <inline-formula><mml:math id="m118"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:mtext>bp</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> = .60. Although the use of SDM had the smallest effect size among the factors examined (<inline-formula><mml:math id="m119"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.53</mml:mn></mml:mrow></mml:math></inline-formula>), it still had a meaningful impact on reliability, consistently leading to lower values, due to the constraints it imposes on item pairing. Specifically, taking GA as an example, reliability was consistently lower when incorporating the desirability constraint, decreasing on average by .01 points compared to the same condition without considering SD, and reaching a decrease of up to .04 points in cases where there is a strong relationship between the scale parameter and <inline-formula><mml:math id="m120"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. The same pattern was observed for the other procedures. Notable interactions were; algorithm with length (<inline-formula><mml:math id="m121"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.31</mml:mn></mml:mrow></mml:math></inline-formula>), algorithm with D-H (<inline-formula><mml:math id="m122"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.54</mml:mn></mml:mrow></mml:math></inline-formula>) and SDM with D-H (<inline-formula><mml:math id="m123"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.59</mml:mn></mml:mrow></mml:math></inline-formula>). These interactions further prove that these factors are relevant in assembling FCQs, as the algorithm proves to be more important when the correlation between <inline-formula><mml:math id="m124"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m125"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is higher and whether you form heteropolar blocks or not, and that this same factor interacts with SDM. In all cases, the ordering of the methods described above is preserved.</p>
<table-wrap id="t2" position="anchor" orientation="portrait">
<label>Table 2</label><caption><title>Average Trait Recovery Across 50 Replications for Questionnaires Assembled Using Each Algorithm in the Simulation Study</title></caption>
<table frame="hsides" rules="groups" style="compact-1; striped-#f3f3f3">
<col width="" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th/>
<th/>
<th/>
<th colspan="4" scope="colgroup">	<inline-formula><mml:math id="m126"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula><hr/></th>
	<th colspan="4" scope="colgroup">	<inline-formula><mml:math id="m127"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula><hr/></th>
</tr>
<tr>
<th valign="bottom"><inline-formula><mml:math id="m128.01"><mml:mrow><mml:mtext fontweight="bold">Length</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.02"><mml:mrow><mml:mtext fontweight="bold">D-H</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.03"><mml:mrow><mml:mtext fontweight="bold">SDM</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.04"><mml:mrow><mml:mtext fontweight="bold">GA</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128"><mml:mrow><mml:msub><mml:mrow><mml:mtext fontweight="bold">SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.05"><mml:mrow><mml:mtext fontweight="bold">BF</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.25"><mml:mrow><mml:msub><mml:mrow><mml:mtext fontweight="bold">SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>b</mml:mtext><mml:mtext>p</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.06"><mml:mrow><mml:mtext fontweight="bold">GA</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m129"><mml:mrow><mml:msub><mml:mrow><mml:mtext fontweight="bold">SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.07"><mml:mrow><mml:mtext fontweight="bold">BF</mml:mtext></mml:mrow></mml:math></inline-formula></th>
<th valign="bottom"><inline-formula><mml:math id="m128.5"><mml:mrow><mml:msub><mml:mrow><mml:mtext fontweight="bold">SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>b</mml:mtext><mml:mtext>p</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
</tr>
</thead>
<tbody>
<tr>
<td>80</td>
<td><inline-formula><mml:math id="m130"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.20</mml:mn></mml:mrow></mml:math></inline-formula> with 25% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.88</bold></td>
<td align="char" char=".">.87</td>
<td align="char" char=".">.87</td>
<td align="char" char=".">.86</td>
<td align="char" char=".">.01</td>
<td align="char" char=".">.01</td>
<td align="char" char=".">.01</td>
<td align="char" char="."><bold>.00</bold></td>
</tr>
<tr>
<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.88</bold></td>
<td align="char" char=".">.87</td>
<td align="char" char=".">.87</td>
<td align="char" char=".">.86</td>
<td align="char" char="."><bold>.01</bold></td>
<td align="char" char="."><bold>.01</bold></td>
<td align="char" char="."><bold>.01</bold></td>
<td align="char" char="."><bold>.01</bold></td>
</tr>
<tr>
	<td/>	
<td><inline-formula><mml:math id="m131"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.20</mml:mn></mml:mrow></mml:math></inline-formula> with 0% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.84</bold></td>
<td align="char" char=".">.82</td>
<td align="char" char=".">.81</td>
<td align="char" char=".">.78</td>
<td align="char" char="."><bold>-.05</bold></td>
<td align="char" char=".">-.07</td>
<td align="char" char=".">-.10</td>
<td align="char" char=".">-.14</td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.84</bold></td>
<td align="char" char=".">.83</td>
<td align="char" char=".">.81</td>
<td align="char" char=".">.78</td>
<td align="char" char="."><bold>-.05</bold></td>
<td align="char" char=".">-.06</td>
<td align="char" char=".">-.10</td>
<td align="char" char=".">-.13</td>
</tr>
<tr>
	<td/>	
<td><inline-formula><mml:math id="m132"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.50</mml:mn></mml:mrow></mml:math></inline-formula> with 0% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.83</bold></td>
<td align="char" char=".">.82</td>
<td align="char" char=".">.79</td>
<td align="char" char=".">.76</td>
<td align="char" char="."><bold>-.06</bold></td>
<td align="char" char=".">-.07</td>
<td align="char" char=".">-.12</td>
<td align="char" char=".">-.17</td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.84</bold></td>
<td align="char" char=".">.83</td>
<td align="char" char=".">.81</td>
<td align="char" char=".">.78</td>
<td align="char" char="."><bold>-.05</bold></td>
<td align="char" char=".">-.06</td>
<td align="char" char=".">-.09</td>
<td align="char" char=".">-.14</td>
</tr>
<tr>
	<td/>	
<td><inline-formula><mml:math id="m133"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula> with 0% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.80</bold></td>
<td align="char" char="."><bold>.80</bold></td>
<td align="char" char=".">.74</td>
<td align="char" char=".">.71</td>
<td align="char" char="."><bold>-.10</bold></td>
<td align="char" char=".">-.11</td>
<td align="char" char=".">-.21</td>
<td align="char" char=".">-.26</td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.83</bold></td>
<td align="char" char=".">.83</td>
<td align="char" char=".">.80</td>
<td align="char" char=".">.78</td>
<td align="char" char="."><bold>-.05</bold></td>
<td align="char" char=".">-.06</td>
<td align="char" char=".">-.10</td>
<td align="char" char=".">-.14</td>
</tr>
<tr>
<td>40</td>
<td><inline-formula><mml:math id="m134"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.20</mml:mn></mml:mrow></mml:math></inline-formula> with 25% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.80</bold></td>
<td align="char" char=".">.78</td>
<td align="char" char=".">.78</td>
<td align="char" char=".">.76</td>
<td align="char" char=".">.01</td>
<td align="char" char=".">.01</td>
<td align="char" char="."><bold>.00</bold></td>
<td align="char" char="."><bold>.00</bold></td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.81</bold></td>
<td align="char" char=".">.78</td>
<td align="char" char=".">.79</td>
<td align="char" char=".">.77</td>
<td align="char" char=".">.02</td>
<td align="char" char=".">.01</td>
<td align="char" char=".">.01</td>
<td align="char" char="."><bold>.00</bold></td>
</tr>
<tr>
	<td/>	
<td><inline-formula><mml:math id="m135"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.20</mml:mn></mml:mrow></mml:math></inline-formula> with 0% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.76</bold></td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.66</td>
<td align="char" char="."><bold>-.06</bold></td>
<td align="char" char=".">-.12</td>
<td align="char" char=".">-.14</td>
<td align="char" char=".">-.20</td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.76</bold></td>
<td align="char" char=".">.72</td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.66</td>
<td align="char" char="."><bold>-.05</bold></td>
<td align="char" char=".">-.11</td>
<td align="char" char=".">-.13</td>
<td align="char" char=".">-.21</td>
</tr>
<tr>
	<td/>	
<td><inline-formula><mml:math id="m136"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.50</mml:mn></mml:mrow></mml:math></inline-formula> with 0% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.75</bold></td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.69</td>
<td align="char" char=".">.64</td>
<td align="char" char="."><bold>-.07</bold></td>
<td align="char" char=".">-.12</td>
<td align="char" char=".">-.17</td>
<td align="char" char=".">-.24</td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.76</bold></td>
<td align="char" char=".">.72</td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.66</td>
<td align="char" char="."><bold>-.05</bold></td>
<td align="char" char=".">-.12</td>
<td align="char" char=".">-.13</td>
<td align="char" char=".">-.20</td>
</tr>
<tr>
	<td/>	
<td><inline-formula><mml:math id="m137"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula> with 0% het</td>
<td>Yes</td>
<td align="char" char="."><bold>.71</bold></td>
<td align="char" char=".">.68</td>
<td align="char" char=".">.65</td>
<td align="char" char=".">.60</td>
<td align="char" char="."><bold>-.12</bold></td>
<td align="char" char=".">-.17</td>
<td align="char" char=".">-.25</td>
<td align="char" char=".">-.32</td>
</tr>
<tr>
	<td/>
	<td/>	
<td>No</td>
<td align="char" char="."><bold>.75</bold></td>
<td align="char" char=".">.71</td>
<td align="char" char=".">.70</td>
<td align="char" char=".">.66</td>
<td align="char" char="."><bold>-.06</bold></td>
<td align="char" char=".">-.12</td>
<td align="char" char=".">-.14</td>
<td align="char" char=".">-.21</td>
</tr>
<tr>
<td colspan="3">Grand mean</td>
<td align="char" char="."><bold>.80</bold></td>
<td align="char" char=".">.78</td>
<td align="char" char=".">.76</td>
<td align="char" char=".">.73</td>
<td align="char" char="."><bold>-.04</bold></td>
<td align="char" char=".">-.07</td>
<td align="char" char=".">-.10</td>
<td align="char" char=".">-.15</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Note</italic>. Maximum values of <inline-formula><mml:math id="m138"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m139"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> closest to zero are marked in bold. All standard deviations of <inline-formula><mml:math id="m140"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m141"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> range around 0 and .03. D-H = degree of relation between <inline-formula><mml:math id="m142"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m143"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> of the positively keyed item bank and possibility of forming heteropolar blocks; SDM = social desirability matching; GA = genetic algorithm; <inline-formula><mml:math id="m144"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>= simulated annealing with scale parameter, <inline-formula><mml:math id="m145"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, optimization; BF = brute-force; SA<sub>bp</sub> = simulated annealing blueprint.</p>
</table-wrap-foot>
</table-wrap><fig id="f2" position="anchor" fig-type="figure" orientation="portrait"><label>Figure 2</label><caption>
<title>Squared Correlation Between <inline-formula><mml:math id="m146"><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and <inline-formula><mml:math id="m147"><mml:mtext>θ</mml:mtext></mml:math></inline-formula></title><p><italic>Note</italic>. GA = genetic algorithm; <inline-formula><mml:math id="m148"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>= simulated annealing with scale parameter, <inline-formula><mml:math id="m149"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, optimization; BF = brute-force; SA<sub>bp</sub> = simulated annealing blueprint; SDM = social desirability matching. Within every facet, the four algorithms are always displayed in the same left-to-right order.</p></caption><graphic xlink:href="meth.18925-f2" position="anchor" orientation="portrait"/></fig>
<p>As for <inline-formula><mml:math id="m150"><mml:mrow><mml:msub><mml:mrow><mml:mtext>RMSE</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub><mml:mfenced><mml:mi>θ</mml:mi></mml:mfenced></mml:mrow></mml:math></inline-formula>, <xref ref-type="fig" rid="f3">Figure 3</xref> shows relevant differences across algorithms that align with the results found in <inline-formula><mml:math id="m151"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>. Generally, GA has lesser error, as it is closer to zero, and SA blueprint is the furthest method from zero. RMSE is lower for <inline-formula><mml:math id="m152"><mml:mi>θ</mml:mi></mml:math></inline-formula> values near zero and increases at the extremes, as well as longer questionnaires reduce error, and greater differences among the algorithms are observed in the condition where <inline-formula><mml:math id="m153"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula> in the 40-block questionnaire.</p><fig id="f3" position="float" fig-type="figure" orientation="portrait"><label>Figure 3</label><caption>
		<title>Average Conditional Root Mean Square Error</title><p><italic>Note</italic>. GA = genetic algorithm; <inline-formula><mml:math id="m172"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>= simulated annealing with scale parameter, <inline-formula><mml:math id="m173"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, optimization; BF = brute-force; SA<sub>bp</sub> = simulated annealing blueprint; SDM = social desirability matching.</p></caption><graphic xlink:href="meth.18925-f3" position="float" orientation="portrait"/></fig></sec>
<sec><title>Ipsativity Indicator</title>
<p>Regarding the ipsativity indicator (<inline-formula><mml:math id="m154"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula>), several patterns emerge from the absolute values in <xref ref-type="table" rid="t2">Table 2</xref>. As seen in Table S3 in the Supplementary Material (see <xref ref-type="bibr" rid="r10">Escudero et al., 2026</xref>), the ANOVA results follow the same pattern as the <inline-formula><mml:math id="m155"><mml:mrow><mml:msubsup><mml:mi>ρ</mml:mi><mml:mrow><mml:mover accent="true"><mml:mi>θ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>θ</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> results previously described. First, mixed banks with 25% heteropolar blocks produce negligible bias across all algorithms (<inline-formula><mml:math id="m156"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Bias</mml:mtext></mml:mrow><mml:mover accent="true"><mml:mi>Φ</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:msub></mml:mrow></mml:math></inline-formula> ranges from .00 to .01). In contrast, bias increases as the correlation between the scale parameter, <inline-formula><mml:math id="m157"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m158"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> grows and when only positively keyed items are used. For the most demanding condition (i.e., <inline-formula><mml:math id="m159"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula>, 0% heteropolar blocks, SDM, and Length 40), the bias reaches -.12 for GA, -.17 for SA with <inline-formula><mml:math id="m160"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> optimization, -.25 for BF, and -.32 for SA blueprint. Across all conditions, GA consistently shows the lowest ipsativity levels, followed by SA with <inline-formula><mml:math id="m161"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> optimization, BF, and SA blueprint. This ordering is also reflected in the grand means: -.04 (GA), -.07 (<inline-formula><mml:math id="m162"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>), -.10 (BF), and -.15 (<inline-formula><mml:math id="m163"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:mtext>bp</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>). These differences also have a large effect size (<inline-formula><mml:math id="m164"><mml:mrow><mml:msubsup><mml:mi>η</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:mn>.82</mml:mn></mml:mrow></mml:math></inline-formula>). Thus, while all methods show increasing bias, as the conditions are more demanding, GA systematically yields the least distorted correlation matrices.</p></sec></sec>
<sec sec-type="other1"><title>Empirical Illustration</title>
<p>We applied GA, SA, both the blueprint method and with a scale parameter, <inline-formula><mml:math id="m165"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, optimization, and BF to an empirical dataset consisting of 286 Likert-type mixed item bank (145 positively keyed) from the IPIP-NEO (<xref ref-type="bibr" rid="r18">Johnson, 2014</xref>). From this dataset, we drew a random sample of 1,000 U.S. participants aged between 19 and 25 years old with no missing data. The rater’s SD data for each item were obtained from <xref ref-type="bibr" rid="r16">Hughes et al. (2021)</xref>. The correlation between the scale parameter, <inline-formula><mml:math id="m166"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="m167"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> in this bank is .30 (with <inline-formula><mml:math id="m168"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.01</mml:mn></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m169"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>−</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>−</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.21</mml:mn></mml:mrow></mml:math></inline-formula>). We assembled four questionnaires for each optimization method, two with a total of 70 blocks, where one had all homopolar blocks and one with 20% heteropolar blocks (14 blocks), and two with 35 blocks, where one had all homopolar blocks and one with 20% heteropolar blocks (7 blocks). All questionnaires incorporated SDM using a 0.75 cutoff for homopolar blocks and 1.125 for heteropolar blocks, since SD ratings ranged from 1 to 7. Likert item parameters were obtained with a graded response model. For the estimation of marginal reliability used in the BF and GA methods, we used the empirical NEO-PI-R factor correlation matrix (<xref ref-type="bibr" rid="r9">Costa &amp; McCrae, 1992</xref>). For algorithm comparison, we considered empirical reliability estimates. The procedure was as follows. First, 5,000 responses to the FCQ were simulated using the <inline-formula><mml:math id="m170"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m171"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> parameters derived from estimates based on the Likert-format version, along with the estimated trait correlation matrix. Next, the MUPP-2PL model was fitted to the simulated data. Finally, empirical reliability was calculated using the empirical_rxx() function from the R package <italic>mirt</italic>.</p>
<p>As seen in <xref ref-type="table" rid="t3">Table 3</xref>, GA consistently provides the most reliable questionnaire scores (close to or above <inline-formula><mml:math id="m174"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>ρ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mi>e</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.70</mml:mn></mml:mrow></mml:math></inline-formula>), followed by SA with <inline-formula><mml:math id="m175"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> optimization, which sometimes performs the same as BF, and SA blueprint. Differences were smaller when assembling longer questionnaires.</p>
<table-wrap id="t3" position="anchor" orientation="portrait">
<label>Table 3</label><caption><title>Average Trait Recovery Using Each Algorithm in the Empirical Illustration</title></caption>
<table frame="hsides" rules="groups">
<col width="" align="left"/>
<col width=""/>
<col width=""/>
<col width=""/>
<col width=""/>
<thead>
<tr>
<th/>
<th colspan="4" scope="colgroup">	<inline-formula><mml:math id="m176"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>ρ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mi>e</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula><hr/></th>
</tr>
<tr>
<th valign="bottom">Condition</th>	
<th scope="colgroup">GA</th>
<th>	<inline-formula><mml:math id="m177"><mml:mrow><mml:msub><mml:mrow><mml:mtext fontweight="bold">SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula></th>
<th>BF</th>
<th>SA<sub>bp</sub></th>
</tr>
</thead>
<tbody>
	<tr style="background-lightblue; white-border-top; white-border-bottom">
<td>70-blocks</td>
<td/>
<td/>
<td/>
<td/>
</tr>
<tr>
<td>20% heteropolar</td>
<td align="char" char="."><bold>.78</bold></td>
<td align="char" char=".">.77</td>
<td align="char" char=".">.76</td>
<td align="char" char=".">.74</td>
</tr>
<tr>
<td>0% heteropolar</td>
<td align="char" char="."><bold>.79</bold></td>
<td align="char" char=".">.76</td>
<td align="char" char=".">.75</td>
<td align="char" char=".">.74</td>
</tr>
	<tr style="background-lightblue; white-border-top; white-border-bottom">
<td>35-blocks</td>
<td/>
<td/>
<td/>
<td/>
</tr>
<tr>
<td>20% heteropolar</td>
<td align="char" char="."><bold>.71</bold></td>
<td align="char" char=".">.65</td>
<td align="char" char=".">.64</td>
<td align="char" char=".">.61</td>
</tr>
<tr>
<td>0% heteropolar</td>
<td align="char" char="."><bold>.70</bold></td>
<td align="char" char=".">.65</td>
<td align="char" char=".">.63</td>
<td align="char" char=".">.59</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>Note</italic>. Maximum values of <inline-formula><mml:math id="m178"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>ρ</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mi>e</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> are marked in bold. GA = genetic algorithm; <inline-formula><mml:math id="m179"><mml:mrow><mml:msub><mml:mrow><mml:mtext>SA</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mtext>a</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>= simulated annealing with scale parameter, <inline-formula><mml:math id="m180"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, optimization; BF = brute-force; SA<sub>bp</sub> = simulated annealing blueprint.</p>
</table-wrap-foot>
</table-wrap></sec>
<sec sec-type="discussion"><title>Discussion</title>
<p>The optimal assembly of non-cognitive questionnaires with adequate reliability has become increasingly relevant with the growing use of FC formats. Despite its importance, no previous study has systematically compared the performance of existing methods for the optimal assembly of FCQs. Therefore, the present study addresses this gap by evaluating the impact of four assembly methods on trait score recovery through a simulation study and an empirical illustration: GA (<xref ref-type="bibr" rid="r20">Kreitchmann et al., 2022</xref>), with an improvement on the application of SD constraints; two approaches of SA implemented in the <italic>autoFC</italic> package (<xref ref-type="bibr" rid="r24">Li et al., 2022</xref>), one using a blueprint method and the other optimizing the within-block difference in the <inline-formula><mml:math id="m181"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> parameters; and a BF random search. Two key aspects identified in the literature as critical for assembly are the use of heteropolar blocks (often recommended to avoid ipsativity issues) and the correlation between <inline-formula><mml:math id="m182"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m183"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, which can make optimal pairing difficult (i.e., pairing items with different <inline-formula><mml:math id="m184"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, while keeping them matched in <inline-formula><mml:math id="m185"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>; <xref ref-type="bibr" rid="r22">Lee et al., 2022</xref>; <xref ref-type="bibr" rid="r25">Li et al., 2025</xref>; <xref ref-type="bibr" rid="r31">Pavlov et al., 2021</xref>). These factors, along with the questionnaire length, were incorporated into the simulation design and empirical illustration.</p>
<p>As we hypothesized, among the methods tested, GA (<xref ref-type="bibr" rid="r20">Kreitchmann et al., 2022</xref>) produced systematically the most reliable questionnaire scores in both the simulation study and empirical illustration. This result can be expected, as this method explicitly optimizes the reliability of the assembled blocks. However, closely behind is the SA with <inline-formula><mml:math id="m186"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> parameter optimization which aligns with promising results found in <xref ref-type="bibr" rid="r25">Li et al. (2025)</xref>. In terms of implementation, GA is more computationally demanding and slower, whereas SA is faster. The SA blueprint method performed similarly to or worse than the BF approach in the simulation study. This result is reasonable, as BF selects the questionnaire with the highest expected score reliability from the 100 generated, whereas the SA blueprint method does not include an explicit reliability optimization step. All examined factors significantly affected reliability, including questionnaire length, SDM, the degree of relation between <inline-formula><mml:math id="m187"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m188"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and the inclusion of heteropolar blocks. As expected, longer questionnaires yielded higher reliability, and the impact of the assembly algorithm became more pronounced with increased length. GA showed the greatest advantage under the most demanding conditions. The results for the ipsativity indicator, bias in the recovery of the correlation matrix, preserve the same ordering of methods as observed for reliability. Ipsativity bias tends to increase when heteropolar blocks are absent, but its magnitude depends on the scenario. In many conditions, including some with 0% heteropolar blocks, bias remains minimal across algorithms, and only under more demanding scenarios such as high correlations between scale parameters and social desirability, shorter tests, or less optimal assembly algorithms does it reach larger values. Across all conditions, GA consistently shows the lowest bias, followed by SA with <inline-formula><mml:math id="m189"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> optimization, BF, and SA blueprint, highlighting that both test design and algorithm choice can influence the magnitude of distortion. This comparison of the algorithms in terms of this variable is informative, since none of them explicitly aims to minimize this bias, allowing for a fairer comparison.</p>
<p>In the simulation study, mixed keyed tests produced higher reliability than positively keyed tests alone. This further emphasizes that the incorporation of heteropolar blocks can be useful if paired correctly, supporting the findings of <xref ref-type="bibr" rid="r3">Brown and Maydeu-Olivares (2011)</xref> and <xref ref-type="bibr" rid="r11">Frick et al. (2023)</xref>, but contrasts with those reported by <xref ref-type="bibr" rid="r13">Graña et al. (2025)</xref>. This did not align with our second hypothesis. It should be noted that the criteria was more lenient when combining items with greater variability in SD in the case of heteropolar blocks, which may have contributed to the observed increase in reliability. Moreover, excluding heteropolar blocks reduces the search space. In the empirical example, this exclusion results in disregarding 12,392 item combinations. Reducing the search space also limits the flexibility to maximize reliability. If the applied researcher considers incorporating heteropolar blocks to be beneficial they can do so, as it can increase reliability, which can be achieved with only a few blocks. However, this can have a downside, since they are harder to form, especially when imposing other constraints such as SDM, as seen in the empirical illustration, when a smaller bank results in fewer viable heteropolar blocks. Such restrictions are particularly problematic in high-stakes contexts where item leakage may further reduce usable item combinations.</p>
<p>In line with our third hypothesis, the relationship between item scale parameters and SD also proved to be relevant. In banks consisting only of positively keyed items, lower correlations between <inline-formula><mml:math id="m190"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m191"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> led to higher reliability. In contrast, high correlations (e.g., <inline-formula><mml:math id="m192"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula>) reduced reliability; this is more noticeable when matching for SD, likely because highly discriminating items were also highly socially desirable, making it harder to assemble blocks with high <inline-formula><mml:math id="m193"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and minimal <inline-formula><mml:math id="m194"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> differences. These results indicate that the more difficult the SDM task is, the more the choice of method matters, with GA showing the best performance.</p>
<p>Regarding our fourth hypothesis, SDM tends to reduce reliability, which we hypothesized would happen because, as an additional constraint, it reduces reliability regardless of the algorithm. This effect is most pronounced in BF, whereas in GA and SA the loss in reliability is smaller. This decrease is typically less than .01, except for the most demanding condition (<inline-formula><mml:math id="m195"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msubsup><mml:mi>a</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup><mml:mo>−</mml:mo><mml:mi>S</mml:mi><mml:msubsup><mml:mi>D</mml:mi><mml:mi>j</mml:mi><mml:mo>+</mml:mo></mml:msubsup></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>.80</mml:mn></mml:mrow></mml:math></inline-formula>). Nevertheless, the difference remained small, and SDM remains important in practice, as it can be easily implemented and can enhance validity.</p>
	<p>While the findings are promising, certain limitations must be considered and may inform future research efforts. A limitation of this study is its focus on pairwise blocks, leaving the evaluation of these assembly methods for larger block sizes to future research. This constraint also applies to GA, which currently does not support the formation of blocks containing more than two items. In this study, we rely on the assumption of measurement invariance, with supporting evidence provided by <xref ref-type="bibr" rid="r29">Morillo et al. (2019)</xref>. Nevertheless, prior research acknowledges that this assumption may not hold universally (<xref ref-type="bibr" rid="r20">Kreitchmann et al., 2022</xref>) and may be influenced by block polarity (<xref ref-type="bibr" rid="r13">Graña et al., 2025</xref>). Accordingly, the reliance on measurement invariance represents a limitation of the current study. As any method that assembles blocks from SS item parameters inherently relies on this assumption, and because measurement invariance remains a critical issue, further empirical work is needed to delineate the cases where it may not be sustained. It may be valuable to explore alternative approaches such as psychometric networks to examine it in more depth (<xref ref-type="bibr" rid="r1">Abdelhamid et al., 2024</xref>; <xref ref-type="bibr" rid="r17">Jamison et al., 2024</xref>). Applied researchers interested in using the algorithms used in this study should consider this when assembling FCQs. Here, we focus on the MUPP-2PL model, although the proposed procedures can in principle be extended to the TIRT framework. The choice of model may have some impact, because even though MUPP-2PL and TIRT are nearly equivalent for binary FC data, they posit different response processes and may therefore yield slightly different block-level parameter estimates. Nevertheless, as our interest lies in the comparative performance of the block-assembly algorithms rather than in the absolute values of the parameters, the overall pattern of results is not expected to change, and the main conclusions should generalize to TIRT-based applications. Another potential direction for future research is to extend the GA approach by incorporating alternative reliability metrics beyond marginal reliability as the objective function. For instance, GA could be adapted to prioritize the reliability of a specific trait or apply weighted importance to certain traits over others or ensure a minimum level of reliability for all traits, rather than simply maximizing the average reliability across all traits. Additionally, an interesting direction for future research would be the incorporation of reliability in the optimization of energy in the <italic>autoFC</italic> package. In this regard, a normative-order indicator was used here as a measure of performance (reliability and similarity between estimated and true theta), although other classification-related metrics may be of interest in applied settings. These other metrics can be investigated in future research. In line with this further exploration and with the goal of supporting practical use, the R functions used in this study have been made available in an OSF repository, at <xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref>. This will facilitate their application, especially for potential users of GA, who previously had to rely on the Shiny app. It would be interesting for future research to examine whether other possible options for block assembly, such as linear programming or ant colony algorithms, might offer better performance under certain conditions.</p>
<sec sec-type="conclusions"><title>Conclusion</title>
<p>In conclusion, we recommend using the GA for assembling FCQs, as it consistently produces high-quality solutions by accounting for key psychometric properties. Its advantages are particularly evident in challenging scenarios such as short questionnaires, high correlation between <inline-formula><mml:math id="m196"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="m197"><mml:mrow><mml:mi>S</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and the need to match items on SD. Although SDM may slightly reduce reliability due to its restrictive nature, it remains essential for minimizing response bias. As the forced-choice format continues to gain popularity over traditional SS formats, the use of optimization algorithms becomes increasingly important. These methods enable fast and reliable questionnaire assembly while accommodating additional constraints such as SD control and the inclusion of heteropolar blocks, making them an essential tool for advancing non-cognitive assessment.</p></sec></sec>
</body>
<back>
	<fn-group content-type="author-contribution">
		<fn fn-type="con">
			<p><italic>Scarlett Sophie Escudero</italic>: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing – original draft, Writing – review &amp; editing. <italic>Miguel A. Sorrel</italic>: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Validation, Writing – original draft, Writing – review &amp; editing. <italic>Rodrigo S. Kreitchmann</italic>: Conceptualization, Methodology, Software, Validation, Writing – review &amp; editing. <italic>Francisco José Abad</italic>: Conceptualization, Funding acquisition, Methodology, Resources, Validation, Writing – review &amp; editing.</p>
		</fn>
	</fn-group>
	
	
	
	<fn-group><fn fn-type="financial-disclosure">
<p content-type="fn-title">This work was funded by MICIU/AEI/10.13039/501100011033 and ERDF/EU under the project “Computerized adaptive tests based on new assessment formats” (Reference: PID2022-137258NB-I00) and the UAM-IIC Chair Psychometric Models and Applications.</p></fn><fn fn-type="conflict">
<p content-type="fn-title">The authors declare that there are no conflict of interests to disclose.</p></fn></fn-group>
<ref-list><title>References</title>
<ref id="r1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Abdelhamid</surname>, <given-names>G. S. M.</given-names></string-name>, <string-name name-style="western"><surname>Hidalgo</surname>, <given-names>M. D.</given-names></string-name>, <string-name name-style="western"><surname>French</surname>, <given-names>B. F.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Gómez-Benito</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2024</year>). <article-title>Partitioning dichotomous items using Mokken scale analysis, exploratory graph analysis and parallel analysis: A Monte Carlo simulation.</article-title> <source>Methodology: European Journal of Research Methods for the Behavioral and Social Sciences</source>, <volume>20</volume>(<issue>3</issue>), <fpage>187</fpage>–<lpage>217</lpage>. <pub-id pub-id-type="doi">10.5964/meth.12503</pub-id></mixed-citation></ref>
<ref id="r2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Brown</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2016</year>). <article-title>Item response models for forced-choice questionnaires: A common framework.</article-title> <source>Psychometrika</source>, <volume>81</volume>(<issue>1</issue>), <fpage>135</fpage>–<lpage>160</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-014-9434-9</pub-id><pub-id pub-id-type="pmid">25663304</pub-id></mixed-citation></ref>
<ref id="r3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Brown</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Maydeu-Olivares</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2011</year>). <article-title>Item response modeling of forced-choice questionnaires.</article-title> <source>Educational and Psychological Measurement</source>, <volume>71</volume>(<issue>3</issue>), <fpage>460</fpage>–<lpage>502</lpage>. <pub-id pub-id-type="doi">10.1177/0013164410375112</pub-id></mixed-citation></ref>
<ref id="r4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Bürkner</surname>, <given-names>P.-C.</given-names></string-name></person-group> (<year>2022</year>). <article-title>On the information obtainable from comparative judgments.</article-title> <source>Psychometrika</source>, <volume>87</volume>(<issue>4</issue>), <fpage>1439</fpage>–<lpage>1472</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-022-09843-z</pub-id><pub-id pub-id-type="pmid">35133553</pub-id></mixed-citation></ref>
<ref id="r5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Cao</surname>, <given-names>M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Drasgow</surname>, <given-names>F.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Does forcing reduce faking? A meta-analytic review of forced-choice personality measures in high-stakes situations.</article-title> <source>Journal of Applied Psychology</source>, <volume>104</volume>(<issue>11</issue>), <fpage>1347</fpage>–<lpage>1368</lpage>. <pub-id pub-id-type="doi">10.1037/apl0000414</pub-id><pub-id pub-id-type="pmid">31070382</pub-id></mixed-citation></ref>
<ref id="r6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Chalmers</surname>, <given-names>R. P.</given-names></string-name></person-group> (<year>2012</year>). <article-title>mirt: A multidimensional item response theory package for the R environment.</article-title> <source>Journal of Statistical Software</source>, <volume>48</volume>(<issue>6</issue>), <fpage>1</fpage>–<lpage>29</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v048.i06</pub-id></mixed-citation></ref>
<ref id="r7"><mixed-citation publication-type="book">Cohen, J. (1988). <italic>Statistical power analysis for the behavioral sciences</italic> (2<sup>nd</sup> ed.). Routledge. <pub-id pub-id-type="doi">10.4324/9780203771587</pub-id></mixed-citation></ref>
<ref id="r8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Corey</surname>, <given-names>D. M.</given-names></string-name>, <string-name name-style="western"><surname>Dunlap</surname>, <given-names>W. P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Burke</surname>, <given-names>M. J.</given-names></string-name></person-group> (<year>1998</year>). <article-title>Averaging correlations: Expected values and bias in combined Pearson rs and Fisher’s z transformations.</article-title> <source>Journal of General Psychology</source>, <volume>125</volume>(<issue>3</issue>), <fpage>245</fpage>–<lpage>261</lpage>. <pub-id pub-id-type="doi">10.1080/00221309809595548</pub-id></mixed-citation></ref>
<ref id="r9"><mixed-citation publication-type="book">Costa, P. T., Jr., &amp; McCrae, R. R. (1992). <italic>NEO-PI-R Professional Manual</italic>. Psychological Assessment Resources.</mixed-citation></ref>
	<ref id="r10"><mixed-citation publication-type="data"><person-group person-group-type="author"><string-name name-style="western"><surname>Escudero</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Sorrel</surname>, <given-names>M. A.</given-names></string-name>, <string-name name-style="western"><surname>Kreitchmann</surname>, <given-names>R. S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name></person-group> (<year>2026</year>). <data-title><italic>Supplementary Materials to</italic> “A comparison of optimization algorithms for forced-choice questionnaire assembly”</data-title> <comment>[Supplementary tables and figures to this study]</comment>. PsychOpen GOLD. <pub-id pub-id-type="doi">10.23668/psycharchives.22226</pub-id></mixed-citation></ref>
<ref id="r11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Frick</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Brown</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Wetzel</surname>, <given-names>E.</given-names></string-name></person-group> (<year>2023</year>). <article-title>Investigating the normativity of trait estimates from multidimensional forced-choice data.</article-title> <source>Multivariate Behavioral Research</source>, <volume>58</volume>(<issue>1</issue>), <fpage>1</fpage>–<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1080/00273171.2021.1938960</pub-id><pub-id pub-id-type="pmid">34464217</pub-id></mixed-citation></ref>
<ref id="r12"><mixed-citation publication-type="book">Goldberg, L. R. (1999). A broad-bandwidth, public domain, personality inventory measuring the lower-level facets of several five-factor models. In I. Mervielde, I. Deary, F. De Fruyt &amp; F. Ostendorf (Eds.), <italic>Personality psychology in Europe</italic> (Vol. 7, pp. 7–28). Tilburg University Press.</mixed-citation></ref>
<ref id="r13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Graña</surname>, <given-names>D. F.</given-names></string-name>, <string-name name-style="western"><surname>Kreitchmann</surname>, <given-names>R. S.</given-names></string-name>, <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Sorrel</surname>, <given-names>M. A.</given-names></string-name></person-group> (<year>2025</year>). <article-title>Equally vs. unequally keyed blocks in forced-choice questionnaires: Implications on validity and reliability.</article-title> <source>Journal of Personality Assessment</source>, <volume>107</volume>(<issue>3</issue>), <fpage>392</fpage>–<lpage>405</lpage>. <pub-id pub-id-type="doi">10.1080/00223891.2024.2420869</pub-id><pub-id pub-id-type="pmid">39526652</pub-id></mixed-citation></ref>
<ref id="r14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hicks</surname>, <given-names>L. E.</given-names></string-name></person-group> (<year>1970</year>). <article-title>Some properties of ipsative, normative, and forced-choice normative measures.</article-title> <source>Psychological Bulletin</source>, <volume>74</volume>(<issue>3</issue>), <fpage>167</fpage>–<lpage>184</lpage>. <pub-id pub-id-type="doi">10.1037/h0029780</pub-id></mixed-citation></ref>
<ref id="r15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hontangas</surname>, <given-names>P. M.</given-names></string-name>, <string-name name-style="western"><surname>de la Torre</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Ponsoda</surname>, <given-names>V.</given-names></string-name>, <string-name name-style="western"><surname>Leenen</surname>, <given-names>I.</given-names></string-name>, <string-name name-style="western"><surname>Morillo</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name></person-group> (<year>2015</year>). <article-title>Comparing traditional and IRT scoring of forced-choice tests.</article-title> <source>Applied Psychological Measurement</source>, <volume>39</volume>(<issue>8</issue>), <fpage>598</fpage>–<lpage>612</lpage>. <pub-id pub-id-type="doi">10.1177/0146621615585851</pub-id><pub-id pub-id-type="pmid">29881030</pub-id></mixed-citation></ref>
<ref id="r16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Hughes</surname>, <given-names>A. W.</given-names></string-name>, <string-name name-style="western"><surname>Dunlop</surname>, <given-names>P. D.</given-names></string-name>, <string-name name-style="western"><surname>Holtrop</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Wee</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Spotting the “ideal” personality response: Effects of item matching in forced choice measures for personnel selection.</article-title> <source>Journal of Personnel Psychology</source>, <volume>20</volume>(<issue>1</issue>), <fpage>17</fpage>–<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1027/1866-5888/a000267</pub-id></mixed-citation></ref>
<ref id="r17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Jamison</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Christensen</surname>, <given-names>A. P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Golino</surname>, <given-names>H. F.</given-names></string-name></person-group> (<year>2024</year>). <article-title>Metric invariance in exploratory graph analysis via permutation testing.</article-title> <source>Methodology</source>, <volume>20</volume>(<issue>2</issue>), <fpage>144</fpage>–<lpage>186</lpage>. <pub-id pub-id-type="doi">10.5964/meth.12877</pub-id></mixed-citation></ref>
<ref id="r18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Johnson</surname>, <given-names>J. A.</given-names></string-name></person-group> (<year>2014</year>). <article-title>Measuring thirty facets of the Five Factor Model with a 120-item public domain inventory: Development of the IPIP-NEO-120.</article-title> <source>Journal of Research in Personality</source>, <volume>51</volume>, <fpage>78</fpage>–<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1016/j.jrp.2014.05.003</pub-id></mixed-citation></ref>
<ref id="r19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kirkpatrick</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Gelatt</surname>, <given-names>C. D.</given-names>, <suffix>Jr</suffix></string-name>., &amp; <string-name name-style="western"><surname>Vecchi</surname>, <given-names>M. P.</given-names></string-name></person-group> (<year>1983</year>). <article-title>Optimization by simulated annealing.</article-title> <source>Science</source>, <volume>220</volume>(<issue>4598</issue>), <fpage>671</fpage>–<lpage>680</lpage>. <pub-id pub-id-type="doi">10.1126/science.220.4598.671</pub-id><pub-id pub-id-type="pmid">17813860</pub-id></mixed-citation></ref>
<ref id="r20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kreitchmann</surname>, <given-names>R. S.</given-names></string-name>, <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Sorrel</surname>, <given-names>M. A.</given-names></string-name></person-group> (<year>2022</year>). <article-title>A genetic algorithm for optimal assembly of pairwise forced-choice questionnaires.</article-title> <source>Behavior Research Methods</source>, <volume>54</volume>, <fpage>1476</fpage>–<lpage>1492</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-021-01677-4</pub-id><pub-id pub-id-type="pmid">34505277</pub-id></mixed-citation></ref>
<ref id="r21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kreitchmann</surname>, <given-names>R. S.</given-names></string-name>, <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name>, <string-name name-style="western"><surname>Ponsoda</surname>, <given-names>V.</given-names></string-name>, <string-name name-style="western"><surname>Nieto</surname>, <given-names>M. D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Morillo</surname>, <given-names>D.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Controlling for response biases in self-report scales: Forced-choice vs. psychometric modeling of Likert items.</article-title> <source>Frontiers in Psychology</source>, <volume>10</volume>, <elocation-id>2309</elocation-id>. <pub-id pub-id-type="doi">10.3389/fpsyg.2019.02309</pub-id><pub-id pub-id-type="pmid">31681103</pub-id></mixed-citation></ref>
<ref id="r22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Lee</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Joo</surname>, <given-names>S. H.</given-names></string-name>, <string-name name-style="western"><surname>Zhou</surname>, <given-names>S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Son</surname>, <given-names>M.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Investigating the impact of negatively keyed statements on multidimensional forced-choice personality measures: A comparison of partially ipsative and IRT scoring methods.</article-title> <source>Personality and Individual Differences</source>, <volume>191</volume>, <elocation-id>111555</elocation-id>. <pub-id pub-id-type="doi">10.1016/j.paid.2022.111555</pub-id></mixed-citation></ref>
<ref id="r23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Lee</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Son</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Zhou</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Joo</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Jia</surname>, <given-names>Z.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Cheng</surname>, <given-names>V.</given-names></string-name></person-group> (<year>2025</year>). <article-title>The journey of forced choice measurement over 80 years: Past, present, and future.</article-title> <source>Organizational Research Methods</source>, <volume>28</volume>(<issue>4</issue>), <fpage>680</fpage>–<lpage>722</lpage>. <pub-id pub-id-type="doi">10.1177/10944281251350687</pub-id></mixed-citation></ref>
<ref id="r24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Li</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Sun</surname>, <given-names>T.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Zhang</surname>, <given-names>B.</given-names></string-name></person-group> (<year>2022</year>). <article-title>autoFC: An R package for automatic item pairing in forced-choice test construction.</article-title> <source>Applied Psychological Measurement</source>, <volume>46</volume>(<issue>1</issue>), <fpage>70</fpage>–<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1177/01466216211051726</pub-id><pub-id pub-id-type="pmid">34898749</pub-id></mixed-citation></ref>
<ref id="r25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Li</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Zhang</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Li</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Sun</surname>, <given-names>T.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Brown</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2025</year>). <article-title>Mixed-keying or desirability-matching in the construction of forced-choice measures? An empirical investigation and practical recommendations.</article-title> <source>Organizational Research Methods</source>, <volume>28</volume>(<issue>2</issue>), <fpage>296</fpage>–<lpage>329</lpage>. <pub-id pub-id-type="doi">10.1177/10944281241229784</pub-id></mixed-citation></ref>
<ref id="r26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Lin</surname>, <given-names>Y.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Brown</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2017</year>). <article-title>Influence of context on item parameters in forced-choice personality assessments.</article-title> <source>Educational and Psychological Measurement</source>, <volume>77</volume>(<issue>3</issue>), <fpage>389</fpage>–<lpage>414</lpage>. <pub-id pub-id-type="doi">10.1177/0013164416646162</pub-id><pub-id pub-id-type="pmid">29795919</pub-id></mixed-citation></ref>
<ref id="r27"><mixed-citation publication-type="book">McCrae, R. R., Herbst, J. H., &amp; Costa, P. T., Jr. (2001). Effects of acquiescence on personality factor structures. In R. Riemann, F. Ostendorf &amp; F. Spinath (Eds.), <italic>Personality and temperament: Genetics, evolution, and structure</italic> (pp. 217–231). Pabst Science.</mixed-citation></ref>
<ref id="r28"><mixed-citation publication-type="web">Morillo, D. (2018). <italic>Item response theory models for forced-choice questionnaires</italic> (Doctoral dissertation, Universidad Autónoma de Madrid). <ext-link ext-link-type="uri" xlink:href="https://repositorio.uam.es/server/api/core/bitstreams/99f23341-5061-4586-a976-3a631ec7721d/content">https://repositorio.uam.es/server/api/core/bitstreams/99f23341-5061-4586-a976-3a631ec7721d/content</ext-link></mixed-citation></ref>
<ref id="r29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Morillo</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name>, <string-name name-style="western"><surname>Kreitchmann</surname>, <given-names>R. S.</given-names></string-name>, <string-name name-style="western"><surname>Leenen</surname>, <given-names>I.</given-names></string-name>, <string-name name-style="western"><surname>Hontangas</surname>, <given-names>P.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ponsoda</surname>, <given-names>V.</given-names></string-name></person-group> (<year>2019</year>). <article-title>The journey from Likert to forced-choice questionnaires: Evidence of the invariance of item parameters.</article-title> <source>Journal of Work and Organizational Psychology</source>, <volume>35</volume>(<issue>2</issue>), <fpage>75</fpage>–<lpage>83</lpage>. <pub-id pub-id-type="doi">10.5093/jwop2019a11</pub-id></mixed-citation></ref>
<ref id="r30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Morillo</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Leenen</surname>, <given-names>I.</given-names></string-name>, <string-name name-style="western"><surname>Abad</surname>, <given-names>F. J.</given-names></string-name>, <string-name name-style="western"><surname>Hontangas</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>de la Torre</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Ponsoda</surname>, <given-names>V.</given-names></string-name></person-group> (<year>2016</year>). <article-title>A dominance variant under the multiunidimensional pairwise-preference framework: Model formulation and Markov chain Monte Carlo estimation.</article-title> <source>Applied Psychological Measurement</source>, <volume>40</volume>(<issue>7</issue>), <fpage>500</fpage>–<lpage>516</lpage>. <pub-id pub-id-type="doi">10.1177/0146621616662226</pub-id><pub-id pub-id-type="pmid">29881066</pub-id></mixed-citation></ref>
<ref id="r31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Pavlov</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Shi</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Maydeu-Olivares</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Fairchild</surname>, <given-names>A.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Item desirability matching in forced-choice test construction.</article-title> <source>Personality and Individual Differences</source>, <volume>183</volume>, <elocation-id>111114</elocation-id>. <pub-id pub-id-type="doi">10.1016/j.paid.2021.111114</pub-id></mixed-citation></ref>
<ref id="r32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Salgado</surname>, <given-names>J. F.</given-names></string-name></person-group> (<year>2016</year>). <article-title>A theoretical model of psychometric effects of faking on assessment procedures: Empirical findings and implications for personality at work.</article-title> <source>International Journal of Selection and Assessment</source>, <volume>24</volume>(<issue>3</issue>), <fpage>209</fpage>–<lpage>228</lpage>. <pub-id pub-id-type="doi">10.1111/ijsa.12142</pub-id></mixed-citation></ref>
<ref id="r33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Salgado</surname>, <given-names>J. F.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Táuriz</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2014</year>). <article-title>The Five-Factor Model, forced-choice personality inventories and performance: A comprehensive meta-analysis of academic and occupational validity studies.</article-title> <source>European Journal of Work and Organizational Psychology</source>, <volume>23</volume>(<issue>1</issue>), <fpage>3</fpage>–<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1080/1359432X.2012.716198</pub-id></mixed-citation></ref>
<ref id="r34"><mixed-citation publication-type="data">Sorrel, M. A., Escudero, S., Abad, F. J., &amp; Kreitchmann, R. S. (2026). <italic>A comparison of optimization algorithms for forced-choice questionnaire assembly</italic> [OSF project page containing R code/functions used in the study, study data, document detailing all study algorithm specifications]. Open Science Framework. <ext-link ext-link-type="uri" xlink:href="https://osf.io/2ubgh/overview">https://osf.io/2ubgh/overview</ext-link></mixed-citation></ref>
<ref id="r35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Stark</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Chernyshenko</surname>, <given-names>O. S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Drasgow</surname>, <given-names>F.</given-names></string-name></person-group> (<year>2005</year>). <article-title>An IRT approach to constructing and scoring pairwise preference items involving stimuli on different dimensions: The multi-unidimensional pairwise-preference model.</article-title> <source>Applied Psychological Measurement</source>, <volume>29</volume>(<issue>3</issue>), <fpage>184</fpage>–<lpage>203</lpage>. <pub-id pub-id-type="doi">10.1177/0146621604273988</pub-id></mixed-citation></ref>
</ref-list>
	<sec sec-type="data-availability" id="das"><title>Data Availability</title>
		<p>The data and code used in the manuscript are publicly available at <xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref>. Supplementary tables and figures to this study can be found at <xref ref-type="bibr" rid="r10">Escudero et al. (2026)</xref></p>
	</sec>	

	
	
	
	<sec sec-type="supplementary-material" id="sp1"><title>Supplementary Materials</title>
		<table-wrap position="anchor" content-type="supplementary-materials">
			<table frame="void" style="background-#f3f3f3 nobreak">
				<col width="60%" align="left"/>
				<col width="40%" align="left"/>
				<thead>
					<tr>
						<th>Type of supplementary material</th>
						<th>Availability/Access</th>
					</tr></thead>
				<tbody>
					<tr>
						<th colspan="2">Data</th>						
					</tr>
					<tr>
						<td>IPIP300.por</td>		
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Code</th>
					</tr>
					<tr>
						<td>Empirical illustration - R code</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr>
						<td>Simulation functions - R code</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr>
						<td>block Assembly NHBSA - R code</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr>
						<td>NHBSA Functions - R code</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr>
						<td>NHBSA - R code</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr>
						<td>select Blocks - R code</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Material</th>
					</tr>
					<tr>
						<td>IPIP Neo ItemKey</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Study/Analysis preregistration</th>
					</tr>	
					<tr>
						<td>Study was not preregistered</td>
						<td>&mdash;</td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Other</th>
					</tr>
					<tr>
						<td>Algorithm Specifications</td>
						<td><xref ref-type="bibr" rid="r34">Sorrel et al. (2026)</xref></td>
					</tr>
					<tr>
						<td>Supplementary Tables and Figures</td>
						<td><xref ref-type="bibr" rid="r10">Escudero et al. (2026)</xref></td>
					</tr>	
				</tbody>
			</table> </table-wrap>
	</sec>
			

<ack>
<p>The authors have no additional (i.e., non-financial) support to report.</p>
</ack>
</back>
</article>
