<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD with MathML3 v1.2 20190208//EN" "JATS-journalpublishing1-mathml3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en">
<front>
<journal-meta><journal-id journal-id-type="publisher-id">METH</journal-id><journal-id journal-id-type="nlm-ta">Methodology</journal-id>
<journal-title-group>
<journal-title>Methodology</journal-title><abbrev-journal-title abbrev-type="pubmed">Methodology</abbrev-journal-title>
</journal-title-group>
<issn pub-type="ppub">1614-1881</issn>
<issn pub-type="epub">1614-2241</issn>
<publisher><publisher-name>PsychOpen</publisher-name></publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">meth.17705</article-id>
<article-id pub-id-type="doi">10.5964/meth.17705</article-id>
<article-categories>
<subj-group subj-group-type="heading"><subject>Original Article</subject></subj-group>

<subj-group subj-group-type="badge">
<subject>Materials</subject>
<subject>Preregistration</subject>
</subj-group>

</article-categories>
<title-group>
<article-title>Evaluate What Is Claimed to Be Confirmed: Initial Version of a Falsification Assessment Form (FAF)</article-title>
<alt-title alt-title-type="right-running">A Falsification Assessment Form</alt-title>
<alt-title specific-use="APA-reference-style" xml:lang="en">Evaluate what is claimed to be confirmed: Initial version of a Falsification Assessment Form (FAF)</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0001-7646-8265</contrib-id><name name-style="western"><surname>Höfler</surname><given-names>Michael</given-names></name><xref ref-type="corresp" rid="cor1">*</xref><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
	<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-1612-3932</contrib-id><name name-style="western"><surname>Kräplin</surname><given-names>Anja</given-names></name><xref ref-type="aff" rid="aff2"><sup>2</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-0540-3998</contrib-id><name name-style="western"><surname>Elsherif </surname><given-names>Mahmoud Medhat</given-names></name><xref ref-type="aff" rid="aff3"><sup>3</sup></xref><xref ref-type="aff" rid="aff4"><sup>4</sup></xref></contrib>
<contrib contrib-type="author"><name name-style="western"><surname>Schepke</surname><given-names>Moritz</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-7685-1034</contrib-id><name name-style="western"><surname>Montefinese </surname><given-names>Maria</given-names></name><xref ref-type="aff" rid="aff5"><sup>5</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0001-7487-3398</contrib-id><name name-style="western"><surname>Seetahul </surname><given-names>Yashvin</given-names></name><xref ref-type="aff" rid="aff6"><sup>6</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0002-9367-6987</contrib-id><name name-style="western"><surname>Sætrevik</surname><given-names>Bjørn</given-names></name><xref ref-type="aff" rid="aff7"><sup>7</sup></xref></contrib>
<contrib contrib-type="author"><name name-style="western"><surname>Peikert</surname><given-names>Aaron</given-names></name><xref ref-type="aff" rid="aff8"><sup>8</sup></xref></contrib>
<contrib contrib-type="author"><name name-style="western"><surname>Varga</surname><given-names>Marton A.</given-names></name><xref ref-type="aff" rid="aff9"><sup>9</sup></xref></contrib>
<contrib contrib-type="author"><contrib-id contrib-id-type="orcid" authenticated="false">https://orcid.org/0000-0003-2121-5177</contrib-id><name name-style="western"><surname>Wallrich </surname><given-names>Lukas</given-names></name><xref ref-type="aff" rid="aff10"><sup>10</sup></xref></contrib>
<contrib contrib-type="editor">
<name>
	<surname>Nájera Álvarez</surname>
	<given-names>Pablo</given-names>
</name>
<xref ref-type="aff" rid="aff11"/>
</contrib>
<aff id="aff1"><label>1</label><institution content-type="dept">Clinical Psychology and Behavioural Neuroscience, Institute of Clinical Psychology and Psychotherapy</institution>, <institution>Technische Universität Dresden</institution>, <addr-line><city>Dresden</city></addr-line>, <country country="DE">Germany</country></aff>
<aff id="aff2"><label>2</label><institution content-type="dept">Department of Psychiatry and Psychotherapy</institution>, <institution>Technische Universität Dresden</institution>, <addr-line><city>Dresden</city></addr-line>, <country country="DE">Germany</country></aff>
<aff id="aff3"><label>3</label><institution content-type="dept">School of Psychology and Vision Sciences</institution>, <institution>University of Leicester</institution>, <addr-line><city>Leicester</city></addr-line>, <country country="GB">United Kingdom</country></aff>
<aff id="aff4"><label>4</label><institution content-type="dept">School of Psychology</institution>, <institution>University of Birmingham</institution>, <addr-line><city>Birmingham</city></addr-line>, <country country="GB">United Kingdom</country></aff>
<aff id="aff5"><label>5</label><institution content-type="dept">Department of Developmental Psychology and Socialisation</institution>, <institution>University of Padua</institution>, <addr-line><city>Padua</city></addr-line>, <country country="IT">Italy</country></aff>
<aff id="aff6"><label>6</label><institution content-type="dept">Institute for Psychology</institution>, <institution>University of Innsbruck</institution>, <addr-line><city>Innsbruck</city></addr-line>, <country country="AT">Austria</country></aff>
<aff id="aff7"><label>7</label><institution content-type="dept">Department of Psychosocial Science, Faculty of Psychology</institution>, <institution>University of Bergen</institution>, <addr-line><city>Bergen</city></addr-line>, <country country="NO">Norway</country></aff>
<aff id="aff8"><label>8</label><institution>Max Planck Institute for Human Development Center for Lifespan Psychology</institution>, <addr-line><city>Berlin</city></addr-line>, <country country="DE">Germany</country></aff>
<aff id="aff9"><label>9</label><institution>ELTE Eötvös Loránd University</institution>, <institution content-type="dept">Institute of Psychology</institution>, <addr-line><city>Budapest</city></addr-line>, <country country="HU">Hungary</country></aff>
<aff id="aff10"><label>10</label><institution content-type="dept">Birkbeck Business School, Birkbeck</institution>, <institution>University of London</institution>, <addr-line><city>London</city></addr-line>, <country country="GB">United Kingdom</country></aff>
	<aff id="aff11">Universidad Pontificia Comillas, Madrid, <country>Spain</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>*</label>Clinical Psychology and Behavioural Neuroscience, Institute of Clinical Psychology and Psychotherapy, Technische Universität Dresden, Dresden, Germany. Chemnitzer Straße 46, 01187 Dresden, Germany. Tel: +49-351-46936921. <email xlink:href="michael.hoefler@tu-dresden.de">michael.hoefler@tu-dresden.de</email></corresp>
</author-notes>
<pub-date date-type="pub" publication-format="electronic"><day>30</day><month>09</month><year>2025</year></pub-date>
	<pub-date pub-type="collection" publication-format="electronic"><year>2025</year></pub-date>
<volume>21</volume>
<issue>3</issue>
<fpage>180</fpage>
<lpage>196</lpage>
<history>
<date date-type="received">
<day>15</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>01</day>
<month>09</month>
<year>2025</year>
</date>
</history>
<permissions><copyright-year>2025</copyright-year><copyright-holder>Höfler, Kräplin, Elsherif  et al.</copyright-holder><license license-type="open-access" specific-use="CC BY 4.0" xlink:href="https://creativecommons.org/licenses/by/4.0/"><ali:license_ref>https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution (CC BY) 4.0 License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p></license></permissions>
<abstract>
<p>Scientific claims, and the way they are tested, must be unambiguous and flexibility must be disclosed. Grounded in Popper’s principle of falsification, we suggest the <italic>Falsification Assessment Form (FAF)</italic>. The form aims to identify ambiguity and undisclosed flexibility in the entire research process with 11 items covering hypothesis formulation, data processing, analysis, and alternative explanations. It also collects information on transparency measures, such as preregistration. The form was developed through consensus among the authors and refined via a collaborative feedback assessment of 19 experts. It is intended for original, quantitative research, it highlights potential issues and requires authors to provide detailed responses. <italic>FAF</italic> is meant to be a structured qualitative audit framework. It can be used to identify concerns in published research, improve the quality of papers during peer review, or guide rigorous study planning from the outset. We open up further refinement and testing of <italic>FAF</italic> to the scientific community.</p>
</abstract>
<kwd-group kwd-group-type="author"><kwd>falsification</kwd><kwd>peer review</kwd><kwd>research assessment</kwd><kwd>research quality</kwd><kwd>research transparency</kwd></kwd-group>

</article-meta>
</front>
<body>
	<sec sec-type="intro" id="intro"><title/>
<p>Science is commonly defended as a self-correcting enterprise (<xref ref-type="bibr" rid="r20">Ioannidis, 2012</xref>). This notion is heavily shaped by <xref ref-type="bibr" rid="r35">Karl Popper’s (1959)</xref> seminal work on falsification. Falsification is considered by many as the most compelling answer to the epistemic problem of how to learn from data (the particular) about underlying phenomena (the general). The common focus on falsifiability, the capacity to demonstrate false propositions are indeed false, encompasses other, seemingly different approaches to science (<xref ref-type="bibr" rid="r7">Feyerabend, 1993</xref>; <xref ref-type="bibr" rid="r10">Gigerenzer &amp; Marewski, 2015</xref>; <xref ref-type="bibr" rid="r23">Lakens &amp; DeBruine, 2021</xref>; <xref ref-type="bibr" rid="r27">Mayo, 2018</xref>; <xref ref-type="bibr" rid="r29">Meehl, 1978</xref>; <xref ref-type="bibr" rid="r39">Uygun Tunç &amp; Tunç, 2023</xref>). At its core, Popper’s principle asks what observation would refute a hypothesis and then seeks that observation. In confirmatory research, the principle translates to a hypothesis or theory predicting a specific result. Observing this result corroborates the hypothesis, while (repeatedly) observing a different result may falsify it. The research process, therefore, must facilitate the ability for false hypotheses to turn out wrong. Despite widespread endorsement of the principle, numerous authors have highlighted its compromise by poor practices throughout the research process. These include vague or ill-defined hypotheses (<xref ref-type="bibr" rid="r5">Devezer &amp; Buzbas, 2021</xref>; <xref ref-type="bibr" rid="r33">Oberauer &amp; Lewandowsky, 2019</xref>), inadequate or under-reported data processing (<xref ref-type="bibr" rid="r25">Loenneker et al., 2024</xref>; <xref ref-type="bibr" rid="r37">Scheel, 2022</xref>), and problematic data analysis (<xref ref-type="bibr" rid="r9">Gigerenzer, 2004</xref>; <xref ref-type="bibr" rid="r30">Nagy et al., 2024</xref>). The key issue is the flexibility with which these procedures are carried out. Without a clear, preregistered prediction, researchers can select results that seemingly support their hypothesis while ignoring others — a practice known as “cherry-picking” (<xref ref-type="bibr" rid="r23">Lakens &amp; DeBruine, 2021</xref>).</p>
<p>Over the past decades, valuable tools for evaluating research have been developed. These assess bias (<xref ref-type="bibr" rid="r40">Viswanathan et al., 2018</xref>), the confidence in a claim (<xref ref-type="bibr" rid="r2">Alipourfard et al., 2021</xref>), its replicability (<xref ref-type="bibr" rid="r8">Fraser et al., 2023</xref>), reporting standards (<xref ref-type="bibr" rid="r3">Appelbaum et al., 2018</xref>) or provide checklists on general research quality (<xref ref-type="bibr" rid="r15">Héroux et al., 2022</xref>; <xref ref-type="bibr" rid="r21">Kerschbaumer et al., 2025</xref>; <xref ref-type="bibr" rid="r32">Nosek et al., 2015</xref>; <xref ref-type="bibr" rid="r41">Wicherts et al., 2016</xref>) and transparency measures (<xref ref-type="bibr" rid="r1">Aczel et al., 2021</xref>; <xref ref-type="bibr" rid="r31">Nanyang Technological University Library, 2023</xref>).</p>
<p>Here, we introduce the <italic>Falsification Assessment Form (FAF)</italic>, a tool designed to evaluate the falsifiability of a published claim on a hypothesis, and to foster falsifiability in the planning stage. Unlike existing tools, <italic>FAF</italic> is based on a single, unifying principle — falsifiability. It refrains from quantifying the impact of the identified issues, a practice that has been criticized for its arbitrariness and high context-dependence in the absence of specific knowledge on how the identified issues correlate with the quantity of interest (<xref ref-type="bibr" rid="r12">Greenland &amp; O’Rourke, 2001</xref>; <xref ref-type="bibr" rid="r14">Herbison et al., 2006</xref>).</p>
<p>Thus, <italic>FAF</italic> aims to operationalize Popper’s seminal idea through a practical form for behavioural, cognitive, social and health sciences. It serves to inform the assessment of a published paper, study planning, manuscript preparation, or manuscript review.</p></sec>
<sec sec-type="methods"><title>Method</title>
<sec><title>Approach of the FAF</title>
<p>The form evaluates the falsifiability of a single claim in support of a hypothesis that is likely to be taken to be confirmatory (e.g., “our study suggests…” or “we found evidence that…”; <xref ref-type="bibr" rid="r19">Höfler et al., 2022</xref>). It is not suitable for inconclusive (e.g., “our study revealed unclear results…”) and exploratory claims (e.g., “we propose the new hypothesis that…”; <xref ref-type="bibr" rid="r19">Höfler et al., 2022</xref>). While flexibility in the research process opens the door to the discovery of novelty in exploratory research, flexibility in confirmatory research must be entirely constrained. A confirmatory claim can be typically extracted from the abstract, discussion, or conclusion of a paper. In the planning phase of a study, <italic>FAF</italic> can guide the choice of rigorous research methods, so that once the study is conducted, the claims made would pass the <italic>FAF</italic> items (except possibly Domain 4, see below).</p>
<p>The form is to be filled out once for every claim in a paper and can be used several times for different claims. For composite claims (e.g., “Therapy A is effective, but Therapy B is not”), it is advisable to evaluate each part separately if their implications for theory building or intervention are not the same (<xref ref-type="bibr" rid="r4">Bender &amp; Lange, 2001</xref>). <italic>FAF</italic> covers original, quantitative research; it does not apply to meta-analyses, re-analyses, or other study types. Additionally, it does not evaluate the hypothesis’s substantive quality and relevance.</p>
<p>Within these limits, <italic>FAF</italic> aims to <italic>uncover as many issues as possible</italic>, using only <italic>a minimum set of straightforward questions</italic>. It returns a list of <italic>potential issues</italic>, which are assumed to be highly context-dependent. The form does not judge these issues but encourages authors and reviewers to address them — ideally before publication. In this sense, <italic>FAF</italic> is meant to be normative. Crucially, it does not constitute a measurement scale or diagnostic instrument requiring high inter-rater reliability, but a structured qualitative audit framework for identifying ambiguities or undisclosed flexibilities that could compromise falsifiability.</p>
<p>Each item contains a broad question paired with an example to illustrate its scope. For example, Item 1.1 asks <italic>“Are there any ambiguities about the meaning of the hypothesis or the conditions under which it is claimed to hold?”</italic> and then explains <italic>“The formulation of a hypothesis must eliminate flexible interpretations. A common instance for flexibility is ambiguity about whether the hypothesis concerns a causal or associational relation. Besides, the hypothesis has to include the conditions under which it holds: the population, materials, stimuli, design procedures and outcomes used.”</italic> A broad question, together with the example, is intended to prompt more specific considerations that may affect how the question is answered. A potential issue then is raised by endorsing a concern, or by highlighting that information is ambiguous or missing, which is to be described in a free text field. In doing so, we expect a paper or its supplementary material to report anything in the research process where flexibility in procedures could undermine falsifiability. We also expect the reporting to be <italic>verifiable</italic>, wherefore the form ends by evaluating if these procedures have been carried out as reported through a final domain on the transparency measures used (<xref ref-type="bibr" rid="r22">Lakens, 2019</xref>; <xref ref-type="bibr" rid="r24">Lakens &amp; Mesquida, 2024</xref>).</p>
<p>Although the items on the wording of a hypothesis, data processing, data analysis (Domains 1 – 3) and transparency measures (Domain 5) are designed to elicit a fairly objective assessment, it is not a concern if some users of the form identify more potential issues than others. <italic>FAF</italic> aims to identify as many of them as possible, not to assert that they are all critical. Domain 4, <italic>“Alternative explanations for the claim, not addressed so far”</italic> includes a single free-text item in which any alternative explanations can be listed. This considers the well-known fact that there are always several possibilities for why a hypothesis may be wrong, and a single study can never test all of them (e.g., the effect does not exist, the measurement instrument is not valid; <xref ref-type="bibr" rid="r28">Meehl, 1967</xref>; <xref ref-type="bibr" rid="r36">Rakover, 2003</xref>). We therefore expect that this item will only remain blank if a paper presents a set of studies that, together, rigorously test all plausible alternative assumptions. Finally, the last domain <italic>“Transparency measures”</italic> asks in detail about preregistration and registered reports, specifically whether they have been timestamped before data access. Other transparency measures are listed in checklist format: the use of open data, open materials, open analysis (code), a reproducibility check, the 21-word solution, or “anything else”. For each transparency measure, it is asked whether it was implemented <italic>“in a way that is insufficient to assess falsifiability” </italic>and, if so, a free text explanation of why it is insufficient is requested.</p>
<p>Table 1 provides a full list of each domain’s items, i.e., items in FAF (Version 1.0).</p>
<table-wrap id="t1A" position="anchor" orientation="portrait">
<label>Table 1A</label><caption><title>Domain 1: Content of the Hypothesis</title></caption>
<table frame="hsides" rules="groups">
<col width="15%" align="center"/>
<col width="85%" align="left"/>
<thead>
<tr>
<th style="indent">Item</th>
<th style="indent">Content</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<list id="L1A1.1a" list-type="simple"><list-item><p>1.1</p></list-item></list></td>
<td><list id="L1A1.1b" list-type="simple"><list-item><p>What is the hypothesis that the paper claims (concludes) to confirm?</p></list-item></list></td>
</tr>
<tr>
<td>
<list id="L1A1.2a" list-type="simple"><list-item><p>1.2</p></list-item></list></td>
<td><list id="L1A1.2b" list-type="simple"><list-item><p>Are there any ambiguities about the meaning of the hypothesis or the conditions under which it is claimed to hold?</p></list-item></list></td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="t1B" position="anchor" orientation="portrait">
<label>Table 1B</label><caption><title>Domain 2: Data Processing, Choice, and Coding of Variables That Entered the Analysis</title></caption>
<table frame="hsides" rules="groups">
<col width="15%" align="center"/>
<col width="85%" align="left"/>
<thead>
<tr>
	<th style="indent">Item</th>
	<th style="indent">Content</th>
</tr>
</thead>
<tbody>
<tr>
<td><list id="L1b1.1a" list-type="simple"><list-item><p>2.1</p></list-item></list></td>	
<td><list id="L1b1.2a" list-type="simple"><list-item><p>Are there undisclosed analytic flexibilities in the data processing, starting from the raw data, selecting measurements, aggregating them into variables (e.g. scales), transforming and categorizing the variables before conducting statistical tests?</p></list-item>
<list-item><p>• Flexibility in data processing from the raw material (e.g., videos, questionnaire items).</p></list-item>
<list-item><p>• Flexibility in the selection or categorization of variables (e.g., dichotomization of actually interval-scaled variables, use of cut-offs, exclusion of items on scales, choice between multiple measures of the same construct).</p></list-item>
<list-item><p>• Flexibility in the exclusion of individuals (e.g. as outliers or inattentive respondents).</p></list-item></list></td></tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="t1C" position="anchor" orientation="portrait">
<label>Table 1C</label><caption><title>Domain 3: Data Analysis and Interpretation</title></caption>
<table frame="hsides" rules="groups">
<col width="15%" align="center"/>
<col width="85%" align="left"/>
<thead>
<tr>
	<th style="indent">Item</th>
	<th style="indent">Content</th>
</tr>
</thead>
<tbody>
<tr>
<td><list id="L1c1.1a" list-type="simple"><list-item><p>3.1</p></list-item></list></td>
<td><list id="L1c1.1b" list-type="simple"><list-item><p>Is there ambiguity about which of the analyses carried out in the paper relate to the prediction made by the hypothesis and justify the claim?</p></list-item></list></td>
</tr>
<tr>
<td><list id="L1c1.2a" list-type="simple"><list-item><p>3.2</p></list-item></list></td>
<td><list id="L1c1.2b" list-type="simple"><list-item><p>Is it unclear how the prediction leads to exactly the analysis (analyses) that were conducted?</p></list-item></list></td>
</tr>
<tr>
<td><list id="L1c1.3a" list-type="simple"><list-item><p>3.3</p></list-item></list></td>
<td><list id="L1c1.3b" list-type="simple"><list-item><p>Is it unclear what results, other than those reported, would have led to the opposite conclusion on the hypothesis? What is the decision rule? How could different results lead to supporting instead of rejecting a hypothesis, or conversely, lead to rejecting rather than supporting a hypothesis?</p></list-item></list></td>
</tr>
<tr>
<td><list id="L1c1.4a" list-type="simple"><list-item><p>3.4</p></list-item></list></td>
<td><list id="L1c1.4b" list-type="simple"><list-item><p>If multiple analyses were conducted to test the hypothesis, the results may be combined in different ways to decide on the overall confirmation of the hypothesis. How did the paper combine results?</p></list-item></list></td>
</tr>
<tr>
<td><list id="L1c1.5a" list-type="simple"><list-item><p>3.5</p></list-item></list></td>
<td><list id="L1c1.5b" list-type="simple"><list-item><p>Among all the assumptions in the statistical test or model, are there any that are clearly inaccurate and favour the claimed hypothesis?</p></list-item></list></td>
</tr>
<tr>
<td><list id="L1c1.6a" list-type="simple"><list-item><p>3.6</p></list-item></list></td>
<td><list id="L1c1.6b" list-type="simple"><list-item><p>Are there parts of the interpretation of the results (as stated in the claim) that have not been explicitly tested for? For instance, is the claim on the hypothesis based only on a crude comparison of p-values?</p></list-item></list></td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="t1D" position="anchor" orientation="portrait">
<label>Table 1D</label><caption><title>Domain 4: Alternative Explanations for the Claim, Not Addressed So Far</title></caption>
<table frame="hsides" rules="groups">
<col width="15%" align="center"/>
<col width="85%" align="left"/>
<thead>
<tr>
	<th style="indent">Item</th>
	<th style="indent">Content</th>
</tr>
</thead>
<tbody>
<tr>
<td><list id="L1D1.1a" list-type="simple"><list-item><p>4.1</p></list-item></list></td>
<td><list id="L1D1.1b" list-type="simple"><list-item><p>What are alternative explanations (not addressed in the previous domains), could account for the finding if the hypothesis was false? List explanations that were not sufficiently addressed.</p></list-item></list></td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="t1E" position="anchor" orientation="portrait">
<label>Table 1E</label><caption><title>List of Transparency Measures Used</title></caption>
<table frame="hsides" rules="groups">
<col width="" align="left"/>
<thead>
<tr>
	<th style="indent">Item</th>
</tr>
</thead>
<tbody>
<tr><td><list list-type="simple"><list-item><p>Was a preregistration or Stage 1 registered report done? In case of preregistration, was it before access to data, verifiable by a time-stamp for either preregistration or data-collection?</p></list-item></list></td></tr>
<tr style="background-lightblue; white-border-top; white-border-bottom"><th>Preregistration or Stage 1 registered report that mentions:</th></tr>
<tr><td><list list-type="simple">
<list-item><p><list list-type="bullet"><list-item><p>a stopping rule for data collection (by date or sample size achieved).</p></list-item>
<list-item><p>the hypothesis and the conditions under which it is claimed to hold.</p></list-item>
<list-item><p>raw data processing and computation of all relevant variables.</p></list-item>
<list-item><p>inclusion and exclusion criteria.</p></list-item>
<list-item><p>the analyses that exactly inform the decision on the hypothesis.</p></list-item>
<list-item><p>anything else.</p></list-item></list></p></list-item></list></td></tr>
<tr style="background-lightblue; white-border-top; white-border-bottom"><th>Checklist:</th></tr>
<tr><td><list list-type="simple">
<list-item><p><list list-type="bullet"><list-item><p>Open data.</p></list-item>
<list-item><p>Open materials.</p></list-item>
<list-item><p>Open analysis/notebook/code.</p></list-item>
<list-item><p>Reproducibility check.</p></list-item>
<list-item><p>Authors have confirmed “We report how we determined our sample size, all data exclusions (if any), all manipulations, and all measures in the study.” (the 21-word solution).</p></list-item>
<list-item><p>Anything else?</p></list-item></list></p></list-item></list></td></tr>
</tbody>
</table>
</table-wrap>
<p>After completing the form, <italic>FAF</italic> returns the following results: The extracted claim with paper information (including title and year of publication), a domain-wise listing of “<italic>Identified issues of the claim which might have impaired falsifiability and require consideration</italic>” and a summary of “<italic>transparency measures used</italic>”, which might include identified flexibilities, e.g., in the preregistration, that remained undisclosed.</p></sec>
<sec><title>Implementation</title>
	<p><italic>FAF</italic> was implemented as a Google Sheet (<xref ref-type="bibr" rid="r6">FAF Research Group, 2025</xref>). It consists of the sheets <italic>“Background”</italic> (including “<italic>Instructions</italic>”), <italic>“Form” </italic>and <italic>“Results”</italic>. Clicking the form link creates a private copy, ensuring that the data is only stored in one’s private Google account. The results sheet can be stored in another spreadsheet or PDF format. (<xref ref-type="bibr" rid="r18">Höfler, Kräplin, Elsherif et al., 2025</xref>).</p></sec>
<sec><title>Evolution of the Form</title>
<p>MH had the initial idea in late 2023 to create a form that would shed light on how well a scientific claim could have turned out differently. He drafted a concept and initial items and preregistered the form on the Open Science Framework (<xref ref-type="bibr" rid="r16">Höfler, 2023</xref>). AK piloted the items by reviewing a couple of papers. MH presented the idea at the SIPS (Society for the Improvement of Psychological Science) conference in June 2023, in person and online. Fourteen colleagues joined the project at that point, and a first draft of the <italic>FAF</italic> was agreed upon by consensus in October 2024, after several online meetings. <italic>FAF</italic> was then implemented on Google Sheets by MS, who then joined the project. The ten authors of this paper remained involved in the project ever since.</p></sec>
<sec><title>Collegial Feedback Assessment</title>
<p>After completing the draft, we decided to collect feedback on the form from a wider group of experts. Doing so, we aimed to gauge how the form is received by experts and, above all, to identify any major inaccuracies and incomprehensibilities. The feedback form included the following items:</p>
<list id="L1" list-type="bullet">
<list-item>
<p><italic>“Have you filled in the Falsification Assessment Form before answering this feedback sheet?”</italic> (“<italic>Yes</italic>”/ “<italic>No answer</italic>”/ “<italic>No</italic>”).</p></list-item>
<list-item>
<p><italic>“What is your general impression of the form, its purpose and approach and structure?”</italic> (Free text assessment).</p></list-item>
<list-item>
<p><italic>“In what context have you used or would you consider using the form?”</italic> (Multiple options allowed).</p>
<list id="L2" list-type="bullet">
<list-item>
<p>“<italic>Assessing a published paper.”</italic></p></list-item>
<list-item>
<p>“<italic>Reviewing a paper.”</italic></p></list-item>
<list-item>
<p>“<italic>Planning research.”</italic></p></list-item>
<list-item>
<p>“<italic>Others (specify).”</italic></p></list-item>
</list></list-item>
<list-item>
<p><italic>“Do you have thoughts or suggestions about specific items? Please indicate the number or topic of an item.”</italic> (Free text assessment)</p></list-item>
<list-item>
<p><italic>“Should you be inclined to allow us to contact you for a more in-depth discussion of your suggestions, please leave your email address. This will assist us in following up on particularly noteworthy but unclear ideas.”</italic></p></list-item>
</list>
<p>We decided to evaluate these questions with a collegial feedback assessment. This anonymous format was chosen to encourage open and honest responses while protecting participant confidentiality. As a result, no identifying information about the respondents was collected. For the development of the initial version of <italic>FAF</italic>, we considered it sufficient that all participants were recognized as experts, as each was personally invited by a member of the <italic>FAF</italic> project. These invitations were made independently to ensure at least a minimal degree of diversity in perspectives. The <italic>FAF</italic> members were instructed to select experts based on predefined criteria: <italic>“You consider somebody to be an expert in meta-science or methods (based on their publications or teaching).”</italic> For group invitations, the criteria included: “<italic>Open science initiatives, meta-science groups, teams working to improve scientific practices and methods, and journal editors.”</italic> In addition, members were explicitly instructed not to invite anyone who had previously been involved in the project.</p>
<p>Feedback was provided anonymously and no data beyond the responses to the survey items were collected or stored. However, the final item gave participants the option to leave their email address if they wished to discuss their input further. By doing so, they voluntarily waived their anonymity to ensure their concerns could be properly addressed. Invitations to participate were sent out between 3 and 31 December 2024, and feedback was collected until 31 January 2025. Eighty-five colleagues from psychology, 27 colleagues from other disciplines, and 13 groups were invited by email. For data protection reasons and since each member sent out invitations, we did not create a central database of invitees. Therefore, some overlap in the invitations may have occurred and the number of unique recipients may be slightly lower.</p></sec></sec>
<sec sec-type="results"><title>Results</title>
<p>A total of 19 responses were returned. Five respondents confirmed that they had completed the <italic>FAF</italic> before providing feedback. However, as all comments were phrased in a way that suggests familiarity with the <italic>FAF</italic>, no responses were excluded. Free text responses on the item “<italic>general impression</italic>” ranged from full support for the form and its content, to complete disagreement. Eight out of nineteen indicated that the form is too long or complex. Other points of criticism included ambiguity about the form’s purpose and, in the “<italic>Suggestions for specific item</italic>s” section, ambiguity about how individual items related to falsifiability, as well as concerns about the content and wording of specific items.</p>
<p>When asked what they would consider using the form for, the following frequencies were reported (multiple responses allowed): 5 for “<italic>Reviewing a paper</italic>” and “<italic>planning research</italic>”, 4 for “<italic>Evaluating a published paper</italic>”, 1 for “<italic>Teaching</italic>”, 6 did not respond and 1 stated that she or he would not use it at all.</p>
<p>The authors of this paper reviewed all the comments before deciding how to revise the <italic>FAF</italic>.</p>
<p>No formal criteria were used to decide which items to retain, revise, or remove; instead, we aimed to remain open to all suggestions and evaluated each on its merits. Through several online meetings, we discussed the comments in detail and reached decisions by group consensus. There was unanimous agreement to streamline and condense the form and to clarify its primary objective. Domain 1 (“<italic>Claim content</italic>”) was reduced from eight items to three. Domain 3 (“<italic>Data analysis and interpretation”)</italic> was shortened from 13 items to six. As Domain 2 (a simple checklist for “<italic>Data processing</italic>”) received little feedback, it was only slightly shortened. Domain 4 (“<italic>Alternative assumptions</italic>”) and the checklist “<italic>List of Transparency Measures Used</italic>” remained essentially unchanged. In addition, the background text, the instructions and all remaining items were thoroughly revised, condensed, and clarified. The streamlining and rewording was largely guided by one feedback suggestion to focus more on the prediction that a hypothesis makes. For example, in Domain 1, the item <italic>“Is there any doubt about the theoretical foundation of the hypothesis?”</italic> was deemed no longer necessary. We also removed the item <italic>“Is there any doubt that the hypothesis was fully declared without being affected by the data?”</italic> because we believe this issue is already addressed — more efficiently — in the final domain on transparency measures, with some modifications to the wording in that section. Also, several conceptual terms were hyperlinked to entries in the Framework for Open and Reproducible Research Training (FORRT) glossary (<xref ref-type="bibr" rid="r34">Parsons et al., 2022</xref>), where they are explained in more detail.</p>
	<p>Following these revisions, the nine out of 19 colleagues who had provided their email address were contacted again and asked if they had any “<italic>further suggestions</italic>” for the revised version of the form. Seven of them responded to the email: three of them expressed support for the revision, three were at least not critical and one still held substantially different views. No further changes were made after this. All previous versions of <italic>FAF</italic> (0.1 to 0.5), along with a summary file documenting the changes from Version 0.5 (pre-feedback) to Version 1.0 (post-collegial feedback assessment), titled “<italic>FAF Changes After Collegial Feedback Assessment</italic>”, are available in the <italic>FAF</italic> Open Science Foundation (OSF) repository under the folder “<italic>Versions of FAF</italic>” (<xref ref-type="bibr" rid="r17">Höfler, Kräplin, Varga et al., 2025</xref>).</p>
<p>The repository also contains some examples of claims evaluated using <italic>FAF</italic>. These include five evaluations by the authors of this paper of the claim “School bullying results in poor psychological conditions<italic>”</italic>, based on a survey of 95,545 Chinese school students (<xref ref-type="bibr" rid="r42">Zhao et al., 2024</xref>). While there was general agreement on most binary items, notable differences emerged regarding whether the hypothesis made a clear prediction (Item 1.2) and whether the results were interpreted beyond what was warranted (Item 3.6). These discrepancies stemmed from differing views on the clarity of the target population and whether the claim implied causality. Differences in free-text assessments offered distinct descriptions of the concerns identified.</p>
<p>These results show that multiple raters identify more potential problems, which naturally and desirably leads to the need for more justification.</p></sec>
<sec sec-type="discussion"><title>Discussion</title>
	<p>With <italic>FAF,</italic> we have introduced a tool that seeks to uphold <xref ref-type="bibr" rid="r35">Popper’s (1959)</xref> longstanding principle of falsifiability in the practices along the research process. The tool scrutinizes the falsifiability of a paper's confirmatory claim about a hypothesis through posing items on 11 sections to be answered. Ideally, the tool informs the study planning, design, and analysis, so that such a claim is later justified if the hypothesis is true and the results turn out as predicted.</p>
<sec><title>Some Illustrative Use Cases</title>
<p>Before discussing <italic>FAF</italic> in detail, some examples of its use are given in <xref ref-type="table" rid="t2">Table 2</xref>.</p>
<table-wrap id="t2" position="anchor" orientation="portrait">
<label>Table 2</label><caption><title>Use Cases for FAF</title></caption>
	<table frame="hsides" rules="groups" style="striped-#f3f3f3">
<col width="" align="left"/>
<col width="" align="left"/>
<thead>
<tr>
<th>Type of use</th>
<th>Example</th>
</tr>
</thead>
<tbody>
<tr>
<td>Assessing a claim in a published paper</td>
<td>A paper claims that “Tai Chi practice enhances self-esteem.” While plausible, the reader questions how easily this conclusion might have turned out otherwise. Using <italic>FAF</italic>, she evaluates how robust the evidence is for this claim.</td>
</tr>
<tr>
<td>Reviewing a paper</td>
<td>A reviewer notices inconsistent standards in how he evaluates manuscripts. He adopts <italic>FAF</italic> to systematize and standardize his assessments.</td>
</tr>
<tr>
<td>Revising a paper</td>
<td>An author uses <italic>FAF</italic> to anticipate reviewer concerns and revise the manuscript accordingly, especially in how the study procedures are reported. While study specifications cannot be changed post hoc, reporting can be clarified.</td>
</tr>
<tr>
<td>Planning a study</td>
<td>A researcher aims to generate strong evidence for a hypothesized effect. She uses all <italic>FAF</italic> domains during planning to minimize potential issues, ensuring the hypothesis is clearly formulated (Domain 1), data processing and analysis are predefined (Domains 2 &amp; 3), and the process is fully transparent (last domain). Alternative explanations (Domain 4) are either tested or explicitly acknowledged.</td>
</tr>
<tr>
<td>Research methods teaching</td>
<td>Students are asked to formulate a hypothesis and design a study using <italic>FAF</italic> to ensure that it could be falsified. This encourages critical thinking and highlights that while issues can be addressed in all domains, Domain 4 (“Alternative explanations”) inherently remains open in single studies.</td>
</tr>
</tbody>
</table>
</table-wrap></sec>
<sec><title>Qualitative Versus Quantitative Assessment of Falsifiability</title>
<p><italic>FAF</italic> is a qualitative tool. It identifies <italic>potential issues</italic> without asserting that a real issue has been brought up. This reflects that the <italic>FAF</italic> items address topics whose impact on falsifiability may depend on context and subjective judgement. For example, the “<italic>meaning of a hypothesis</italic>” may be unclear to one researcher but not to another, or clarity may be achieved by explanation in response to completing the form (e.g., an unpublished claim on a hypothesis can be revised to achieve clarity). Potential issues flagged by <italic>FAF</italic> may be resolved through additional explanation provided in response to filling out the form. While we believe that <italic>FAF</italic> meets its goal if it stimulates the impetus to address such instances, it is natural to ask whether its qualitative approach could be extended towards a quantitative scoring of a claim's trustworthiness.</p>
<p>The quantitative counterpart to falsifiability is the <italic>severity of a test</italic>, the probability that a false hypothesis turns out to be false (<xref ref-type="bibr" rid="r22">Lakens, 2019</xref>; <xref ref-type="bibr" rid="r27">Mayo, 2018</xref>). This quantity appeals for its clarity of interpretation, but it is extremely difficult to calculate in practice, because it depends on numerous factors whose impact is difficult to determine, including those covered by <italic>FAF</italic>. Such factors involve bias in analysis — for example, due to unconsidered correlations between observations (see Item 3.2: “<italic>Among all the assumptions in the statistical test or model, are there any that are clearly inaccurate and favour the claimed hypothesis?</italic>”). However, bias is highly context-dependent, and quantifying its extent requires a profound understanding of the mechanisms that produced a particular dataset, like the true magnitude of unconsidered correlations or features of measurement, selection, and unaccounted-for confounders (<xref ref-type="bibr" rid="r11">Greenland, 2005</xref>).</p>
<p>Nevertheless, anchoring it in the extremely simple cases in which computing the severity of a test is feasible helps to get an idea on its wider range in more complex, realistic scenarios. <xref ref-type="bibr" rid="r27">Mayo (2018)</xref> has elaborated much on severity calculations under the unrealistic assumptions of the absence of any bias (<xref ref-type="bibr" rid="r11">Greenland, 2005</xref>) and questionable research practices such as <italic>p</italic>-hacking and HARKing. In this case, and if a claim on a hypothesis is based on a single (frequentist) statistical test, severity is as follows:</p>
<list id="L3" list-type="bullet">
<list-item>
<p>For a statistically significant test result (<italic>p</italic> &lt; α) in favour of, for example, claiming an effect, severity equals 1−α.</p></list-item>
<list-item>
<p>For a nonsignificant test result in favour of no effect (<italic>p</italic> ≥ α), severity is 1−β, where β is actually a (monotonously growing) function of the unknown true effect size.</p></list-item>
</list>
<p>If the claim relies on multiple tests — as addressed in <italic>FAF</italic> Item 3.4 — and the decision rule requires that <italic>at least one</italic> out of k tests be passed (Item 3.3), severity <italic>decrease</italic>s. If the claim is in favor of an effect, severity ranges between (1 − k·α), assuming independent tests, and (1 − α), assuming completely correlated tests. If the claim is against the effect, severity ranges between (1 − k·β) and (1 − β). Conversely, if the decision rule requires that <italic>all tests</italic> be passed, severity is <italic>higher</italic>. In that case, severity in favour of the effect ranges between (1 − α)<sup>k</sup> and (1 − α), depending on the degree of dependence between tests. The analogous range for claims against the effect is between (1 − β)<sup>k</sup> and (1 − β).</p>
<p>Questionable practices reduce severity, and the more ambiguities and flexibilities remain undisclosed (i.e., the more potential issues <italic>FAF</italic> identifies), the more room there is for severity to fall below the above values (<xref ref-type="bibr" rid="r22">Lakens, 2019</xref>). In cases of extensive fishing through flexible hypothesis formulation, data processing or analysis, severity can even approach 0 (<xref ref-type="bibr" rid="r13">Head et al., 2015</xref>; <xref ref-type="bibr" rid="r38">Simmons et al., 2011</xref>). Such a claim is essentially unfalsifiable. When a potential issue is identified by endorsing a <italic>FAF</italic> item — for example, unclear wording of the hypothesis (Item 1.2) or a lack of preregistration or insufficient detail in it (domain on transparency measures) — we do not know which questionable research practices, if any, have been employed, or to what extent. These unknowns determine the degree to which severity is reduced.</p>
<p>While these considerations apply only to specific situations and yield broad severity ranges, we strongly encourage mathematical elaborations of severity in fairly more complex scenarios, particularly regarding the impact of issues addressed by specific <italic>FAF</italic> items.</p></sec>
<sec><title>Strengths and Limitations</title>
<p>A key strength of <italic>FAF</italic> is its foundation in Popper’s principle of falsifiability, the most widely accepted scientific standard for hypothesis testing. <italic>FAF’s</italic> qualitative approach highlights potential issues without making direct judgments about the validity of a claim. This reduces the risk of arbitrary or overly context-dependent evaluations. Furthermore, since falsifiability is a universal principle of scientific inquiry, <italic>FAF</italic> provides wide usability, allowing researchers to apply it without adjustments in different contexts and across disciplines. Its areas of usage include manuscript preparation, peer review, and post-publication evaluation. While many existing approaches are restricted to post hoc evaluation of compliance with certain reporting standards or methodological benchmarks, <italic>FAF</italic>, when used yet at the planning stage, promotes better research design from the outset and encourages researchers to formulate hypotheses that can be meaningfully tested and potentially refuted. Since <italic>FAF</italic> was developed through a consensus among the authors and refined via a collaborative feedback process involving 19 experts from diverse backgrounds, major weaknesses and inaccuracies in the form should have been identified. The result aims to strike a balance: while it does not identify every possible issue, it captures many and remains concise and accessible to a broad range of researchers.</p>
<p>Despite these strengths, <italic>FAF</italic> has limitations. It is specifically designed for original, quantitative research and does not apply to meta-analyses, re-analyses, or other study types. Assessing falsifiability in such research requires other approaches, though elements of <italic>FAF</italic> may still be useful. For example, in meta-analysis, assessing reproducibility is a major challenge (<xref ref-type="bibr" rid="r26">Maassen et al., 2020</xref>), yet flexibilities in the research process must still be disclosed (e.g., predefining the hypothesis, data processing, and analysis before accessing the data). To illustrate <italic>FAF</italic> items, we used examples from the behavioural sciences because they should be familiar to everyone. However, we would like to emphasise that the same items apply to data-intensive research such as neuroimaging or ecological momentary assessment. These fields are not exempt from falsifiability standards simply because they pose greater practical challenges.</p>
<p>As a second limitation, the effectiveness of <italic>FAF</italic> depends on researchers’ willingness to engage critically with the identified issues. While the form encourages users to document and reflect on potential ambiguities, there is no mechanism to enforce proper resolutions of these concerns. <italic>FAF</italic> includes a section on transparency measures, but this only addresses whether the reported procedures <italic>could</italic> be verified, not whether they have actually been verified.</p>
	<p>Finally, while the expert feedback process contributed valuable insights, the relatively small number of responses (<italic>n</italic> = 19) means that the revisions to the form were based on a limited set of perspectives. Therefore, the version presented in this paper represents only the initial release — Version 1.0 — of <italic>FAF</italic>. We invite the broader scientific community to contribute to its further development. To facilitate this, we have assigned it a Creative Commons Attribution license (CC BY) and created a GitHub repository (<xref ref-type="bibr" rid="r18">Höfler, Kräplin, Elsherif et al., 2025</xref>), which will allow for the collection of community feedback that can guide future refinements. Further systematic evaluation should aim to achieve broader validation, including large-scale testing across different disciplines. Additionally, interrater agreement should be examined to identify which items — and in which scientific contexts — researchers are most likely to disagree. Such insights could inform improvements to <italic>FAF</italic>’s utility and generalizability.</p></sec>
<sec sec-type="conclusions"><title>Conclusion</title>
<p><italic>FAF</italic> is beneficial because it avoids the heuristic approach to complexity and bias of multi-criteria assessments by focusing on a single, fundamental principle — falsifiability. This makes it broadly applicable and theoretically grounded, while also encouraging more rigid and less biased research practices. Possible future extensions include training artificial intelligence to automatically extract the relevant information from papers. AI-based large-scale meta-science could analyse thousands of publications to identify not only research fields with particularly high numbers of “not even wrong” hypotheses (<xref ref-type="bibr" rid="r37">Scheel, 2022</xref>), but also steps in the research process with particularly frequently undisclosed flexibilities. These insights could then inform the development of targeted research tools and teaching approaches to close the flexibilities. Ultimately, this could encourage the risk-taking that researchers need for conducting studies capable of providing robust evidence.</p></sec></sec>
</body>
<back>
<ref-list><title>References</title>
<ref id="r1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Aczel</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Szaszi</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Sarafoglou</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Kekecs</surname>, <given-names>Z.</given-names></string-name>, <string-name name-style="western"><surname>Kucharský</surname>, <given-names>Š.</given-names></string-name>, <string-name name-style="western"><surname>Benjamin</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Chambers</surname>, <given-names>C. D.</given-names></string-name>, <string-name name-style="western"><surname>Fisher</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Gelman</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Gernsbacher</surname>, <given-names>M. A.</given-names></string-name>, <string-name name-style="western"><surname>Ioannidis</surname>, <given-names>J. P.</given-names></string-name>, <string-name name-style="western"><surname>Johnson</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>Jonas</surname>, <given-names>K.</given-names></string-name>, <string-name name-style="western"><surname>Kousta</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Lilienfeld</surname>, <given-names>S. O.</given-names></string-name>, <string-name name-style="western"><surname>Lindsay</surname>, <given-names>D. S.</given-names></string-name>, <string-name name-style="western"><surname>Morey</surname>, <given-names>C. C.</given-names></string-name>, <string-name name-style="western"><surname>Munafò</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Newell</surname>, <given-names>B. R.</given-names></string-name>, <etal>. . .</etal> <string-name name-style="western"><surname>Wagenmakers</surname>, <given-names>E.-J.</given-names></string-name></person-group> (<year>2021</year>). <article-title>A consensus-based transparency checklist.</article-title> <source>Nature Human Behaviour</source>, <volume>4</volume>, <fpage>4</fpage>–<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1038/s41562-019-0772-6</pub-id></mixed-citation></ref>
<ref id="r2"><mixed-citation publication-type="preprint"><person-group person-group-type="author"><string-name name-style="western"><surname>Alipourfard</surname>, <given-names>N.</given-names></string-name>, <string-name name-style="western"><surname>Arendt</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Benjamin</surname>, <given-names>D. M.</given-names></string-name>, <string-name name-style="western"><surname>Benkler</surname>, <given-names>N.</given-names></string-name>, <string-name name-style="western"><surname>Bishop</surname>, <given-names>M. M.</given-names></string-name>, <string-name name-style="western"><surname>Burstein</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Bush</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Caverlee</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Chen</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Clark</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Dreber Almenberg</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Errington</surname>, <given-names>T. M.</given-names></string-name>, <string-name name-style="western"><surname>Fidler</surname>, <given-names>F.</given-names></string-name>, <string-name name-style="western"><surname>Field</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Fox</surname>, <given-names>N.</given-names></string-name>, <string-name name-style="western"><surname>Frank</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Fraser</surname>, <given-names>H.</given-names></string-name>, <string-name name-style="western"><surname>Friedman</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Gelman</surname>, <given-names>B.</given-names></string-name>, <etal>. . .</etal> <string-name name-style="western"><surname>Wu</surname>, <given-names>J.</given-names></string-name></person-group> (<year>2021</year>, May 4). <article-title>Systematizing confidence in open research and evidence (SCORE).</article-title> <source>OSF Preprints</source>. <pub-id pub-id-type="doi">10.31235/osf.io/46mnb</pub-id></mixed-citation></ref>
<ref id="r3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Appelbaum</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Cooper</surname>, <given-names>H.</given-names></string-name>, <string-name name-style="western"><surname>Kline</surname>, <given-names>R. B.</given-names></string-name>, <string-name name-style="western"><surname>Mayo-Wilson</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>Nezu</surname>, <given-names>A. M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Rao</surname>, <given-names>S. M.</given-names></string-name></person-group> (<year>2018</year>). <article-title>Journal article reporting standards for quantitative research in psychology: The APA Publications and Communications Board Task Force Report.</article-title> <source>American Psychologist</source>, <volume>73</volume>(<issue>1</issue>), <fpage>3</fpage>–<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1037/amp0000191</pub-id><pub-id pub-id-type="pmid">29345484</pub-id></mixed-citation></ref>
<ref id="r4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Bender</surname>, <given-names>R.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Lange</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2001</year>). <article-title>Adjusting for multiple testing — When and how?</article-title> <source>Journal of Clinical Epidemiology</source>, <volume>54</volume>(<issue>4</issue>), <fpage>343</fpage>–<lpage>349</lpage>. <pub-id pub-id-type="doi">10.1016/S0895-4356(00)00314-0</pub-id><pub-id pub-id-type="pmid">11297884</pub-id></mixed-citation></ref>
<ref id="r5"><mixed-citation publication-type="web">Devezer, B., &amp; Buzbas, E. O. (2021). <italic>Minimum viable experiment to replicate</italic> [Preprint]. PhilSci Archive. <ext-link ext-link-type="uri" xlink:href="https://philsci-archive.pitt.edu/24720/7/Minimum_Viable_Experiment_to_Replicate_preprint.pdf">https://philsci-archive.pitt.edu/24720/7/Minimum_Viable_Experiment_to_Replicate_preprint.pdf</ext-link></mixed-citation></ref>
<ref id="r6"><mixed-citation publication-type="web">FAF Research Group. (2025). <italic>Falsification Assessment Form, Version 1.0</italic>. [Spreadsheet] <ext-link ext-link-type="uri" xlink:href="https://docs.google.com/spreadsheets/d/1a1pQ-jQYcBDAZ4p8Tpkq27NYl9p_bxvj-v_3MYq1nOA/copy">https://docs.google.com/spreadsheets/d/1a1pQ-jQYcBDAZ4p8Tpkq27NYl9p_bxvj-v_3MYq1nOA/copy</ext-link></mixed-citation></ref>
<ref id="r7"><mixed-citation publication-type="book">Feyerabend, P. (1993). <italic>Against method</italic> (3<sup>rd</sup> ed.). Verso.</mixed-citation></ref>
<ref id="r8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Fraser</surname>, <given-names>H.</given-names></string-name>, <string-name name-style="western"><surname>Bush</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Wintle</surname>, <given-names>B. C.</given-names></string-name>, <string-name name-style="western"><surname>Mody</surname>, <given-names>F.</given-names></string-name>, <string-name name-style="western"><surname>Smith</surname>, <given-names>E. T.</given-names></string-name>, <string-name name-style="western"><surname>Hanea</surname>, <given-names>A. M.</given-names></string-name>, <string-name name-style="western"><surname>Gould</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>Hemming</surname>, <given-names>V.</given-names></string-name>, <string-name name-style="western"><surname>Hamilton</surname>, <given-names>D. G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Rumpff</surname>, <given-names>L.</given-names></string-name></person-group> (<year>2023</year>). <article-title>Predicting reliability through structured expert elicitation with the repliCATS (Collaborative Assessments for Trustworthy Science) process.</article-title> <source>PLoS One</source>, <volume>18</volume>(<issue>1</issue>), <elocation-id>e0274429</elocation-id>. <pub-id pub-id-type="doi">10.1371/journal.pone.0274429</pub-id><pub-id pub-id-type="pmid">36701303</pub-id></mixed-citation></ref>
<ref id="r9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gigerenzer</surname>, <given-names>G.</given-names></string-name></person-group> (<year>2004</year>). <article-title>Mindless statistics.</article-title> <source>Journal of Socio-Economics</source>, <volume>33</volume>(<issue>5</issue>), <fpage>587</fpage>–<lpage>606</lpage>. <pub-id pub-id-type="doi">10.1016/j.socec.2004.09.033</pub-id></mixed-citation></ref>
<ref id="r10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Gigerenzer</surname>, <given-names>G.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Marewski</surname>, <given-names>J. N.</given-names></string-name></person-group> (<year>2015</year>). <article-title>Surrogate science: The idol of a universal method for scientific inference.</article-title> <source>Journal of Management</source>, <volume>41</volume>(<issue>2</issue>), <fpage>421</fpage>–<lpage>440</lpage>. <pub-id pub-id-type="doi">10.1177/0149206314547522</pub-id></mixed-citation></ref>
<ref id="r11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Greenland</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2005</year>). <article-title>Multiple-bias modeling for analysis of observational data.</article-title> <source>Journal of the Royal Statistical Society. Series A, (Statistics in Society)</source>, <volume>168</volume>(<issue>2</issue>), <fpage>267</fpage>–<lpage>306</lpage>. <pub-id pub-id-type="doi">10.1111/j.1467-985X.2004.00349.x</pub-id></mixed-citation></ref>
<ref id="r12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Greenland</surname>, <given-names>S.</given-names></string-name>, &amp; <string-name name-style="western"><surname>O’Rourke</surname>, <given-names>K.</given-names></string-name></person-group> (<year>2001</year>). <article-title>On the bias produced by quality scores in meta‐analysis, and a hierarchical view of proposed solutions.</article-title> <source>Biostatistics</source>, <volume>2</volume>(<issue>4</issue>), <fpage>463</fpage>–<lpage>471</lpage>. <pub-id pub-id-type="doi">10.1093/biostatistics/2.4.463</pub-id><pub-id pub-id-type="pmid">12933636</pub-id></mixed-citation></ref>
	<ref id="r13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Head</surname>, <given-names>M. L.</given-names></string-name>, <string-name name-style="western"><surname>Holman</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Lanfear</surname>, <given-names>R.</given-names></string-name>, <string-name name-style="western"><surname>Kahn</surname>, <given-names>A. T.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Jennions</surname>, <given-names>M. D.</given-names></string-name></person-group> (<year>2015</year>). <article-title>The extent and consequences of <italic>p</italic>-hacking in science.</article-title> <source>PLoS Biology</source>, <volume>13</volume>(<issue>3</issue>), <elocation-id>e1002106</elocation-id>. <pub-id pub-id-type="doi">10.1371/journal.pbio.1002106</pub-id><pub-id pub-id-type="pmid">25768323</pub-id></mixed-citation></ref>
<ref id="r14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Herbison</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>Hay-Smith</surname>, <given-names>J.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Gillespie</surname>, <given-names>W. J.</given-names></string-name></person-group> (<year>2006</year>). <article-title>Adjustment of meta-analyses on the basis of quality scores should be abandoned.</article-title> <source>Journal of Clinical Epidemiology</source>, <volume>59</volume>(<issue>12</issue>), <fpage>1249.e1</fpage>–<lpage>1249.e11</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2006.03.008</pub-id><pub-id pub-id-type="pmid">17098567</pub-id></mixed-citation></ref>
	<ref id="r15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Héroux</surname>, <given-names>M. E.</given-names></string-name>, <string-name name-style="western"><surname>Butler</surname>, <given-names>A. A.</given-names></string-name>, <string-name name-style="western"><surname>Cashin</surname>, <given-names>A. G.</given-names></string-name>, <string-name name-style="western"><surname>McCaughey</surname>, <given-names>E. J.</given-names></string-name>, <string-name name-style="western"><surname>Affleck</surname>, <given-names>A. J.</given-names></string-name>, <string-name name-style="western"><surname>Green</surname>, <given-names>M. A.</given-names></string-name>, <string-name name-style="western"><surname>Cartwright</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Jones</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Kiely</surname>, <given-names>K. M.</given-names></string-name>, <string-name name-style="western"><surname>van Schooten</surname>, <given-names>K. S.</given-names></string-name>, <string-name name-style="western"><surname>Menant</surname>, <given-names>J. C.</given-names></string-name>, <string-name name-style="western"><surname>Wewege</surname>, <given-names>M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Gandevia</surname>, <given-names>S. C.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Quality Output Checklist and Content Assessment (QuOCCA): A new tool for assessing research quality and reproducibility.</article-title> <source>BMJ Open</source>, <volume>12</volume>(<issue>9</issue>), <elocation-id>e060976</elocation-id>. <pub-id pub-id-type="doi">10.1136/bmjopen-2022-060976</pub-id><pub-id pub-id-type="pmid">36167369</pub-id></mixed-citation></ref>
<ref id="r16"><mixed-citation publication-type="web">Höfler, M. (2023). <italic>A form to assess severe testing of a paper’s claims (Version 1)</italic> [Preregistration]. Open Science Framework. <ext-link ext-link-type="uri" xlink:href="https://osf.io/c8j4w/registrations">https://osf.io/c8j4w/registrations</ext-link></mixed-citation></ref>
<ref id="r17"><mixed-citation publication-type="web">Höfler, M., Kräplin, A., Varga, M. A., Wallrich, L., Elsherif, M., Peikert, A., Seetahul, Y., Sætrevik, B., &amp; M., Montefinese  (2025). <italic>A falsification assessment form</italic> [FAF development materials and version history]. Open Science Framework. <ext-link ext-link-type="uri" xlink:href="https://osf.io/c8j4w">https://osf.io/c8j4w</ext-link></mixed-citation></ref>
<ref id="r18"><mixed-citation publication-type="web">Höfler, M., Kräplin, A., Elsherif, M. M., Schepke, M., Montefinese, M., Seetahul, Y., Sætrevik, B., Peikert, A., Varga, M. A., &amp; Wallrich, L. (2025). <italic>Falsification Assessment Form</italic> [Questionnaire]. GitHub. <ext-link ext-link-type="uri" xlink:href="https://github.com/FalsificationAssessmentForm">https://github.com/FalsificationAssessmentForm</ext-link></mixed-citation></ref>
<ref id="r19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Höfler</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Scherbaum</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Kanske</surname>, <given-names>P.</given-names></string-name>, <string-name name-style="western"><surname>McDonald</surname>, <given-names>B.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Miller</surname>, <given-names>R.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Means to valuable exploration: I. The blending of confirmation and exploration and how to resolve it.</article-title> <source>Meta-Psychology</source>, <volume>6</volume>. <pub-id pub-id-type="doi">10.15626/MP.2021.2837</pub-id></mixed-citation></ref>
<ref id="r20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Ioannidis</surname>, <given-names>J. P. A.</given-names></string-name></person-group> (<year>2012</year>). <article-title>Why science is not necessarily self-correcting.</article-title> <source>Perspectives on Psychological Science</source>, <volume>7</volume>(<issue>6</issue>), <fpage>645</fpage>–<lpage>654</lpage>. <pub-id pub-id-type="doi">10.1177/1745691612464056</pub-id><pub-id pub-id-type="pmid">26168125</pub-id></mixed-citation></ref>
<ref id="r21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Kerschbaumer</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Voracek</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Aczél</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Anderson</surname>, <given-names>S. F.</given-names></string-name>, <string-name name-style="western"><surname>Booth</surname>, <given-names>B. M.</given-names></string-name>, <string-name name-style="western"><surname>Buchanan</surname>, <given-names>E. M.</given-names></string-name>, <string-name name-style="western"><surname>Carlsson</surname>, <given-names>R.</given-names></string-name>, <string-name name-style="western"><surname>Heck</surname>, <given-names>D. W.</given-names></string-name>, <string-name name-style="western"><surname>Hiekkaranta</surname>, <given-names>A. P.</given-names></string-name>, <string-name name-style="western"><surname>Hoekstra</surname>, <given-names>R.</given-names></string-name>, <string-name name-style="western"><surname>Karch</surname>, <given-names>J. D.</given-names></string-name>, <string-name name-style="western"><surname>Lafit</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Lin</surname>, <given-names>Z.</given-names></string-name>, <string-name name-style="western"><surname>Liu</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>MacKinnon</surname>, <given-names>D. P.</given-names></string-name>, <string-name name-style="western"><surname>McGorray</surname>, <given-names>E. L.</given-names></string-name>, <string-name name-style="western"><surname>Moreau</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Papadatou-Pastou</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Paterson</surname>, <given-names>H.</given-names></string-name>, <etal>. . .</etal> <string-name name-style="western"><surname>Tran</surname>, <given-names>U. S.</given-names></string-name></person-group> (<year>2025</year>). <article-title>VALID: A checklist-based approach for improving validity in psychological research.</article-title> <source>Advances in Methods and Practices in Psychological Science</source>, <volume>8</volume>(<issue>1</issue>). <pub-id pub-id-type="doi">10.1177/25152459241306432</pub-id></mixed-citation></ref>
<ref id="r22"><mixed-citation publication-type="preprint"><person-group person-group-type="author"><string-name name-style="western"><surname>Lakens</surname>, <given-names>D.</given-names></string-name></person-group> (<year>2019</year>). <article-title><italic>The value of preregistration for psychological science: A conceptual analysis</italic>.</article-title> <source>PsyArXiv Preprints</source>. <pub-id pub-id-type="doi">10.31234/osf.io/jbh4w</pub-id></mixed-citation></ref>
<ref id="r23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Lakens</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>DeBruine</surname>, <given-names>L. M.</given-names></string-name></person-group> (<year>2021</year>). <article-title>Improving transparency, falsifiability, and rigor by making hypothesis tests machine-readable.</article-title> <source>Advances in Methods and Practices in Psychological Science</source>, <volume>4</volume>(<issue>2</issue>). <pub-id pub-id-type="doi">10.1177/2515245920970949</pub-id></mixed-citation></ref>
<ref id="r24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Lakens</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Mesquida</surname>, <given-names>C.</given-names></string-name></person-group> (<year>2024</year>). <article-title>The benefits of preregistration and Registered Reports.</article-title> <source>Evidence-Based Toxicology</source>, <volume>2</volume>(<issue>1</issue>). <pub-id pub-id-type="doi">10.1080/2833373X.2024.2376046</pub-id></mixed-citation></ref>
<ref id="r25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Loenneker</surname>, <given-names>H. D.</given-names></string-name>, <string-name name-style="western"><surname>Buchanan</surname>, <given-names>E. M.</given-names></string-name>, <string-name name-style="western"><surname>Martinovici</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Primbs</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Elsherif</surname>, <given-names>M. M.</given-names></string-name>, <string-name name-style="western"><surname>Baker</surname>, <given-names>B. J.</given-names></string-name>, <string-name name-style="western"><surname>Dudda</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Durdevic</surname>, <given-names>D. F.</given-names></string-name>, <string-name name-style="western"><surname>Misic</surname>, <given-names>K.</given-names></string-name>, <string-name name-style="western"><surname>Peetz</surname>, <given-names>H. K.</given-names></string-name>, <string-name name-style="western"><surname>Roer</surname>, <given-names>J. P.</given-names></string-name>, <string-name name-style="western"><surname>Schulze</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Wagner</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Wolska</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Kuhrt</surname>, <given-names>C.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Pronizius</surname>, <given-names>E.</given-names></string-name></person-group> (<year>2024</year>). <article-title>We don’t know what you did last summer. On the importance of transparent reporting of reaction time data pre-processing.</article-title> <source>Cortex</source>, <volume>172</volume>, <fpage>14</fpage>–<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1016/j.cortex.2023.11.012</pub-id><pub-id pub-id-type="pmid">38154375</pub-id></mixed-citation></ref>
	<ref id="r26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Maassen</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>van Assen</surname>, <given-names>M. A. L. M.</given-names></string-name>, <string-name name-style="western"><surname>Nuijten</surname>, <given-names>M. B.</given-names></string-name>, <string-name name-style="western"><surname>Olsson-Collentine</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Wicherts</surname>, <given-names>J. M.</given-names></string-name></person-group> (<year>2020</year>). <article-title>Reproducibility of individual effect sizes in meta-analyses in psychology.</article-title> <source>PLoS ONE</source>, <volume>15</volume>(<issue>5</issue>), <elocation-id>e0233107</elocation-id>. <pub-id pub-id-type="doi">10.1371/journal.pone.0233107</pub-id></mixed-citation></ref>
<ref id="r27"><mixed-citation publication-type="book">Mayo, D. G. (2018). <italic>Statistical inference as severe testing: How to get beyond the statistics wars</italic>. Cambridge University Press.</mixed-citation></ref>
<ref id="r28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Meehl</surname>, <given-names>P. E.</given-names></string-name></person-group> (<year>1967</year>). <article-title>Theory-testing in psychology and physics: A methodological paradox.</article-title> <source>Philosophy of Science</source>, <volume>34</volume>(<issue>2</issue>), <fpage>103</fpage>–<lpage>115</lpage>. <pub-id pub-id-type="doi">10.1086/288135</pub-id></mixed-citation></ref>
<ref id="r29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Meehl</surname>, <given-names>P. E.</given-names></string-name></person-group> (<year>1978</year>). <article-title>Theoretical risks and tabular asterisks: Sir Karl, Sir Ronald, and the slow progress of soft psychology.</article-title> <source>Journal of Consulting and Clinical Psychology</source>, <volume>46</volume>, <fpage>806</fpage>–<lpage>834</lpage>. <pub-id pub-id-type="doi">10.1037/0022-006X.46.4.806</pub-id></mixed-citation></ref>
<ref id="r30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Nagy</surname>, <given-names>T.</given-names></string-name>, <string-name name-style="western"><surname>Hergert</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Elsherif</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Wallrich</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Schmidt</surname>, <given-names>K.</given-names></string-name>, <string-name name-style="western"><surname>Waltzer</surname>, <given-names>T.</given-names></string-name>, <string-name name-style="western"><surname>Payne</surname>, <given-names>J. W.</given-names></string-name>, <string-name name-style="western"><surname>Gjoneska</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>Seetahul</surname>, <given-names>Y.</given-names></string-name>, <string-name name-style="western"><surname>Wang</surname>, <given-names>Y. A.</given-names></string-name>, <string-name name-style="western"><surname>Scharfenberg</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Tyson</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Yang</surname>, <given-names>Y.-F.</given-names></string-name>, <string-name name-style="western"><surname>Skvortsova</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Alarie</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Graves</surname>, <given-names>K. A.</given-names></string-name>, <string-name name-style="western"><surname>Sotola</surname>, <given-names>L. K.</given-names></string-name>, <string-name name-style="western"><surname>Moreau</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Rubínová</surname>, <given-names>E.</given-names></string-name></person-group> (<year>2024</year>). <article-title>Bestiary of questionable research practices in psychology</article-title>. <source>Advances in Methods and Practices in Psychological Science</source>, <volume>8</volume>(<issue>3</issue>). <pub-id pub-id-type="doi">10.1177/25152459251348431</pub-id></mixed-citation></ref>
<ref id="r31"><mixed-citation publication-type="web">Nanyang Technological University Library. (2023). <italic>Open research checklist (Version 1)</italic>. Nanyang Technological University. <ext-link ext-link-type="uri" xlink:href="https://libguides.ntu.edu.sg/openresearchchecklist">https://libguides.ntu.edu.sg/openresearchchecklist</ext-link></mixed-citation></ref>
<ref id="r32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Nosek</surname>, <given-names>B. A.</given-names></string-name>, <string-name name-style="western"><surname>Alter</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Banks</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Borsboom</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Bowman</surname>, <given-names>S. D.</given-names></string-name>, <string-name name-style="western"><surname>Breckler</surname>, <given-names>S. J.</given-names></string-name>, <string-name name-style="western"><surname>Buck</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Chambers</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Chin</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Christensen</surname>, <given-names>G.</given-names></string-name>, <string-name name-style="western"><surname>Dumas</surname>, <given-names>T.</given-names></string-name>, <string-name name-style="western"><surname>Ebersole</surname>, <given-names>C.</given-names></string-name>, <string-name name-style="western"><surname>Fidler</surname>, <given-names>F.</given-names></string-name>, <string-name name-style="western"><surname>Hauser</surname>, <given-names>D.</given-names></string-name>, <string-name name-style="western"><surname>Hennessy</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Hilgard</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Hogg</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Humphreys</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Kaatz</surname>, <given-names>A.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Yarkoni</surname>, <given-names>T.</given-names></string-name></person-group> (<year>2015</year>). <article-title>Promoting an open research culture: Author guidelines for journals could help to promote transparency, openness, and reproducibility.</article-title> <source>Science</source>, <volume>348</volume>(<issue>6242</issue>), <fpage>1422</fpage>–<lpage>1425</lpage>. <pub-id pub-id-type="doi">10.1126/science.aab2374</pub-id><pub-id pub-id-type="pmid">26113702</pub-id></mixed-citation></ref>
<ref id="r33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Oberauer</surname>, <given-names>K.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Lewandowsky</surname>, <given-names>S.</given-names></string-name></person-group> (<year>2019</year>). <article-title>Addressing the theory crisis in psychology.</article-title> <source>Psychonomic Bulletin &amp; Review</source>, <volume>26</volume>, <fpage>1596</fpage>–<lpage>1618</lpage>. <pub-id pub-id-type="doi">10.3758/s13423-019-01645-2</pub-id><pub-id pub-id-type="pmid">31515732</pub-id></mixed-citation></ref>
<ref id="r34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Parsons</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Azevedo</surname>, <given-names>F.</given-names></string-name>, <string-name name-style="western"><surname>Elsherif</surname>, <given-names>M. M.</given-names></string-name>, <string-name name-style="western"><surname>Guay</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Shahim</surname>, <given-names>O. N.</given-names></string-name>, <string-name name-style="western"><surname>Govaart</surname>, <given-names>G. H.</given-names></string-name>, <string-name name-style="western"><surname>Norris</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>O’Mahony</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Parker</surname>, <given-names>A. J.</given-names></string-name>, <string-name name-style="western"><surname>Todorovic</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Pennington</surname>, <given-names>C. R.</given-names></string-name>, <string-name name-style="western"><surname>Garcia-Pelegrin</surname>, <given-names>E.</given-names></string-name>, <string-name name-style="western"><surname>Lazić</surname>, <given-names>A.</given-names></string-name>, <string-name name-style="western"><surname>Robertson</surname>, <given-names>O.</given-names></string-name>, <string-name name-style="western"><surname>Middleton</surname>, <given-names>S. L.</given-names></string-name>, <string-name name-style="western"><surname>Valentini</surname>, <given-names>B.</given-names></string-name>, <string-name name-style="western"><surname>McCuaig</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Baker</surname>, <given-names>B. J.</given-names></string-name>, <string-name name-style="western"><surname>Collins</surname>, <given-names>E.</given-names></string-name>, <etal>. . .</etal> <string-name name-style="western"><surname>Aczel</surname>, <given-names>B.</given-names></string-name></person-group> (<year>2022</year>). <article-title>A community-sourced glossary of open scholarship terms.</article-title> <source>Nature Human Behaviour</source>, <volume>6</volume>(<issue>3</issue>), <fpage>312</fpage>–<lpage>318</lpage>. <pub-id pub-id-type="doi">10.1038/s41562-021-01269-4</pub-id><pub-id pub-id-type="pmid">35190714</pub-id></mixed-citation></ref>
<ref id="r35"><mixed-citation publication-type="book">Popper, K. (1959). <italic>The logic of scientific discovery</italic>. Hutchinson.</mixed-citation></ref>
<ref id="r36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Rakover</surname>, <given-names>S. S.</given-names></string-name></person-group> (<year>2003</year>). <article-title>Experimental psychology and Duhem’s Problem.</article-title> <source>Journal for the Theory of Social Behaviour</source>, <volume>33</volume>(<issue>1</issue>), <fpage>45</fpage>–<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1111/1468-5914.00205</pub-id></mixed-citation></ref>
	<ref id="r37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Scheel</surname>, <given-names>A. M.</given-names></string-name></person-group> (<year>2022</year>). <article-title>Why most psychological research findings are not even wrong.</article-title> <source>Infant and Child Development</source>, <volume>31</volume>(<issue>1</issue>), <elocation-id>e2295</elocation-id>. <pub-id pub-id-type="doi">10.1002/icd.2295</pub-id></mixed-citation></ref>
<ref id="r38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Simmons</surname>, <given-names>J. P.</given-names></string-name>, <string-name name-style="western"><surname>Nelson</surname>, <given-names>L. D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Simonsohn</surname>, <given-names>U.</given-names></string-name></person-group> (<year>2011</year>). <article-title>False-positive psychology: Undisclosed flexibility in data collection and analysis allows presenting anything as significant.</article-title> <source>Psychological Science</source>, <volume>22</volume>(<issue>11</issue>), <fpage>1359</fpage>–<lpage>1366</lpage>. <pub-id pub-id-type="doi">10.1177/0956797611417632</pub-id><pub-id pub-id-type="pmid">22006061</pub-id></mixed-citation></ref>
<ref id="r39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Uygun Tunç</surname>, <given-names>D.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Tunç</surname>, <given-names>M. N.</given-names></string-name></person-group> (<year>2023</year>). <article-title>A falsificationist treatment of auxiliary hypotheses in social and behavioral sciences: Systematic replications framework.</article-title> <source>Meta-Psychology</source>, <volume>7</volume>. <pub-id pub-id-type="doi">10.15626/MP.2021.2756</pub-id></mixed-citation></ref>
<ref id="r40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Viswanathan</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>Patnode</surname>, <given-names>C. D.</given-names></string-name>, <string-name name-style="western"><surname>Berkman</surname>, <given-names>N. D.</given-names></string-name>, <string-name name-style="western"><surname>Bass</surname>, <given-names>E. B.</given-names></string-name>, <string-name name-style="western"><surname>Chang</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Hartling</surname>, <given-names>L.</given-names></string-name>, <string-name name-style="western"><surname>Murad</surname>, <given-names>M. H.</given-names></string-name>, <string-name name-style="western"><surname>Treadwell</surname>, <given-names>J. R.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Kane</surname>, <given-names>R. L.</given-names></string-name></person-group> (<year>2018</year>). <article-title>Recommendations for assessing the risk of bias in systematic reviews of health-care interventions.</article-title> <source>Journal of Clinical Epidemiology</source>, <volume>97</volume>, <fpage>26</fpage>–<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2017.12.004</pub-id><pub-id pub-id-type="pmid">29248724</pub-id></mixed-citation></ref>
	<ref id="r41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Wicherts</surname>, <given-names>J. M.</given-names></string-name>, <string-name name-style="western"><surname>Veldkamp</surname>, <given-names>C. L. S.</given-names></string-name>, <string-name name-style="western"><surname>Augusteijn</surname>, <given-names>H. E. M.</given-names></string-name>, <string-name name-style="western"><surname>Bakker</surname>, <given-names>M.</given-names></string-name>, <string-name name-style="western"><surname>van Aert</surname>, <given-names>R. C. M.</given-names></string-name>, &amp; <string-name name-style="western"><surname>van Assen</surname>, <given-names>M. A. L. M.</given-names></string-name></person-group> (<year>2016</year>). <article-title>Degrees of freedom in planning, running, analyzing, and reporting psychological studies: A checklist to avoid <italic>p</italic>-hacking.</article-title> <source>Frontiers in Psychology</source>, <volume>7</volume>, <elocation-id>1832</elocation-id>. <pub-id pub-id-type="doi">10.3389/fpsyg.2016.01832</pub-id><pub-id pub-id-type="pmid">27933012</pub-id></mixed-citation></ref>
	<ref id="r42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><string-name name-style="western"><surname>Zhao</surname>, <given-names>N.</given-names></string-name>, <string-name name-style="western"><surname>Yang</surname>, <given-names>S.</given-names></string-name>, <string-name name-style="western"><surname>Zhang</surname>, <given-names>Q.</given-names></string-name>, <string-name name-style="western"><surname>Wang</surname>, <given-names>J.</given-names></string-name>, <string-name name-style="western"><surname>Xie</surname>, <given-names>W.</given-names></string-name>, <string-name name-style="western"><surname>Tan</surname>, <given-names>Y.</given-names></string-name>, &amp; <string-name name-style="western"><surname>Zhou</surname>, <given-names>T.</given-names></string-name></person-group> (<year>2024</year>). <article-title>School bullying results in poor psychological conditions: Evidence from a survey of 95,545 subjects.</article-title> <source>Frontiers in Psychology</source>, <volume>15</volume>, <elocation-id>1279872</elocation-id>. <pub-id pub-id-type="doi">10.3389/fpsyg.2024.1279872</pub-id><pub-id pub-id-type="pmid">38328372</pub-id></mixed-citation></ref>
</ref-list><ack><title>Acknowledgements</title>
<p>Thanks a lot to the following colleagues who have contributed with their hints and advice: Tao Coll-Martín, Lisa DeBruine, Simona Haasova, Rink Hoekstra, Aaron Peikert, Jill de Ron, Danilo Calero Sequeira, Gilad Feldman, Nicklas Hafiz, Alex Holcombe, Jürgen Hoyer, Amélie Gourdan Kanhukamwe, Daniël Lakens, Niclas Jacobs, Philipp Kanske, Robert Miller, Gerit Pfuhl, Merle-Marie Pittelkow, Priya Silverstein, Anna van 't Veer.</p></ack>
	<sec sec-type="data-availability" id="das"><title>Data Availability</title>
		<p>The data collected for this study are not publicly available because participants did not consent to data sharing.</p>
	</sec>	

	<sec sec-type="supplementary-material" id="sp1"><title>Supplementary Materials</title>
		<table-wrap position="anchor" content-type="supplementary-materials">
			<table frame="void" style="background-#f3f3f3 nobreak">
				<col width="60%" align="left"/>
				<col width="40%" align="left"/>
				<thead>
					<tr>
						<th>Type of supplementary material</th>
						<th>Availability/Access</th>
					</tr></thead>
				<tbody>
					<tr>
						<th colspan="2">Data</th>						
					</tr>
					<tr>
						<td>Data for this study are not publicly available.</td>
						<td>&mdash;</td>
					</tr>					
					<tr style="grey-border-top-dashed">
					<th colspan="2">Preregistration</th>						
					</tr>
					<tr><td>Preregistration for study.</td>
						<td><xref ref-type="bibr" rid="r16">Höfler (2023)</xref></td>
					</tr>
					<tr style="grey-border-top-dashed">
						<th colspan="2">Code</th>
					</tr>
					<tr>
						<td>No code was provided for the study.</td>
						<td>&mdash;</td>
					</tr>	
					<tr style="grey-border-top-dashed">
						<th colspan="2">Material</th>
					</tr>
					<tr>
						<td>a) FAF development materials and version history.</td>
						<td><xref ref-type="bibr" rid="r17">Höfler, Kräplin, Varga et al. (2025)</xref></td>
					</tr>
					<tr>
						<td>b) Falsification Assessment Form (FAF) Questionnaire.</td>
						<td><xref ref-type="bibr" rid="r18">Höfler, Kräplin, Elsherif et al. (2025)</xref></td>
					</tr>
					</tbody>
			</table> </table-wrap>
	</sec>		

<fn-group>
<fn fn-type="financial-disclosure"><p>The authors have no funding to report.</p></fn>
</fn-group>
<fn-group>
<fn fn-type="conflict"><p>The authors have declared that no competing interests exist.</p></fn>
</fn-group>
</back>
</article>
