.MCAD 303010000 1 74 68 0 .CMD FORMAT rd=d ct=10 im=i et=3 zt=15 pr=3 mass length time charge temperature tr=0 vm=0 .CMD SET ORIGIN 0 .CMD SET TOL 0.001000000000000 .CMD SET PRNCOLWIDTH 8 .CMD SET PRNPRECISION 4 .CMD PRINT_SETUP 1.200000 0.979167 1.200000 1.200000 0 .CMD HEADER_FOOTER 1 1 *empty* *empty* *empty* 0 1 *empty* *empty* *empty* .CMD HEADER_FOOTER_FONT fontID=14 family=Arial points=10 bold=0 italic=0 underline=0 .CMD HEADER_FOOTER_FONT fontID=15 family=Arial points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE_NAME fontID=0 name=Variables .CMD DEFINE_FONTSTYLE_NAME fontID=1 name=Constants .CMD DEFINE_FONTSTYLE_NAME fontID=2 name=Text .CMD DEFINE_FONTSTYLE_NAME fontID=4 name=User^1 .CMD DEFINE_FONTSTYLE_NAME fontID=5 name=User^2 .CMD DEFINE_FONTSTYLE_NAME fontID=6 name=User^3 .CMD DEFINE_FONTSTYLE_NAME fontID=7 name=User^4 .CMD DEFINE_FONTSTYLE_NAME fontID=8 name=User^5 .CMD DEFINE_FONTSTYLE_NAME fontID=9 name=User^6 .CMD DEFINE_FONTSTYLE_NAME fontID=10 name=User^7 .CMD DEFINE_FONTSTYLE fontID=0 family=Times^New^Roman points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=1 family=Times^New^Roman points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=2 family=Arial points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=4 family=Arial points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=5 family=Courier^New points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=6 family=System points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=7 family=Script points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=8 family=Roman points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=9 family=Modern points=10 bold=0 italic=0 underline=0 .CMD DEFINE_FONTSTYLE fontID=10 family=Times^New^Roman points=10 bold=0 italic=0 underline=0 .CMD UNITS U=1 .CMD DIMENSIONS_ANALYSIS 0 0 .TXT 3 1 19 0 Cg a73.375000,73.000000,2637 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 \fs32 Probabilities for matching things}{\fs20 \par \par Mathcad 6.0 plus worksheet to describe calculation of the probabilities of getting a certain number of matched cases in the "categorical mismatched cases" paradigm for determining whether there is statistically significant information being contained in case descriptions. This can be viewed using Mathbrowser and is mounted on the WWW as:\par http://www.sghms.ac.uk/mhs/psychotherapy/mch/dichot1.mcd\par }{\i \fs20 It is mounted as copyleft. I.e. I, Chris Evans, claim copyright, however, I am very happy for anyone to duplicate it however they like provided they do not change it in any way (other than tinkering with any of the parameters you wish, which is the full glory of the Mathcad/Mathbrowser capability). Try modelling other mismatchings and other scores as you go through. If you want to change it or tell me about errors contact me at:\par Psychotherapy Section, St. George's Hospital Medical School, Cranmer Terrace,\par London SW17 0RE Britain Email: C.Evans@sghms.ac.u}{\fs20 k\par \par N.B. The maths of this can be used in other scenarios, this worksheet was actually prepared to deterimine the statistical significances for matching "birth stories" of patients against their own Cognitive Analytic Therapy (C.A.T.) self-descriptions which had been elicited independently of the birth stories. Each judge was given 16 birth stories and for each was given the self-description of the patient and a randomly selected self-description of another patient. The possible matching score clearly ranges between zero and 16.\par \par The probability of achieving a perfect 16 out of 16 match (or n out of n for the more general case) is easy. Assuming that the null model is of random matching and taking a one-tailed approach of discounting the probability of significantly }{\fs20 \i bad }{\fs20 matching the probability must be .5 (random matching) raised to the power n (here .5^16=0.00001526).\par \par The probabilities of lesser matches (0 <= r <= n) form a classical statistical distribution: the Binomial (see e.g. Evans (no relation!), Hastings & Peacock (1993)). In this distribution each score r has probability equal to the number of different ways of achieving that score multiplied by the probability of r correct (p^r) and the probability of (n-r) incorrect ((1-p)^(n-r) = p^(n-r) since p = .5 = (1-p)).\par \par The number of ways getting that score is n!/r!(n-r)! where the "!" sign indicates a "factorial":\par n! = 1.2.3. .... (n-3).(n-2).(n-1).n\par where 0! = 1 by convention, 1! = 1, 2! = 2, 3! = 6, 4! = 24 etc.\par \par O.K. now for the Mathcad formulae}} } .EQN 80 0 3 0 {0:comb}NAME({0:r}NAME,{0:n}NAME):{0:if}NAME({0:n}NAME<{0:r}NAME,0,(({0:n}NAME)!)/(({0:r}NAME)!*(({0:n}NAME-{0:r}NAME))!)) .EQN 4 0 23 0 {0:factorial}NAME(2)={0}?_n_u_l_l_ .EQN 0 13 24 0 {0:factorial}NAME(3)={0}?_n_u_l_l_ .TXT 0 15 25 0 Cg a7.125000,17.000000,18 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 Checking!}} } .EQN 3 -58 37 0 {0:factorial}NAME(0)={0}?_n_u_l_l_ .EQN 0 15 38 0 {0:factorial}NAME(1)={0}?_n_u_l_l_ .EQN 0 15 26 0 {0:comb}NAME(2,3)={0}?_n_u_l_l_ .EQN 0 13 27 0 {0:comb}NAME(2,4)={0}?_n_u_l_l_ .TXT 0 15 28 0 Cg a11.000000,17.000000,23 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 More checking!}} } .EQN 3 -28 29 0 {0:binomial}NAME({0:p}NAME,{0:r}NAME,{0:n}NAME):{0:comb}NAME({0:r}NAME,{0:n}NAME)*({0:p}NAME)^({0:r}NAME)*((1-{0:p}NAME))^(({0:n}NAME-{0:r}NAME)) .EQN 1 -30 42 0 {0:comb}NAME(1,2)={0}?_n_u_l_l_ .EQN 0 15 43 0 {0:comb}NAME(1,3)={0}?_n_u_l_l_ .TXT 3 15 30 0 Cg a68.250000,73.000000,173 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 Now lets consider the possibility (achieved by the researcher herself in the study described) of correctly matching all 16 birth stories to their self-descriptions}} } .TXT 2 -30 51 0 Cg a26.875000,73.000000,47 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 Now to define the binomial probability}} } .TXT 3 30 59 0 Cg a29.000000,44.000000,53 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 That looks pretty statistically significant!}} } .TXT 3 0 32 0 Cg a54.250000,73.000000,84 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 The probability of a lower score can be calculated in exactly the same way:}} } .TXT 3 0 33 0 Cg a71.500000,73.000000,207 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 However, we're really interested in the probability of doing as well, or better, than this so we need a cumulative probability for that score and any higher score. Another definition will do this:}} } .EQN 7 -30 60 0 {0:binomial}NAME(.5,16,16)={0}?_n_u_l_l_ .EQN 1 30 34 0 {0:cum_bin}NAME({0:p}NAME,{0:r}NAME,{0:n}NAME):{0:if}NAME({0:r}NAME>{0:n}NAME,0,(({0:r}NAME,{0:n}NAME,{0:x}NAME,{0:binomial}NAME({0:p}NAME,{0:x}NAME,{0:n}NAME)){64})) .EQN 6 -30 65 0 {0:binomial}NAME(.5,15,16)={0}?_n_u_l_l_ .TXT 1 30 42 0 Cg a53.125000,73.125000,76 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain Let's check that works for the silly option of zero or more correct matches} } .TXT 3 25 45 0 Cg a1.250000,48.125000,2 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain \par } } .EQN 1 -25 44 0 {0:cum_bin}NAME(.5,0,15)={0}?_n_u_l_l_ .TXT 0 34 47 0 Cg a4.750000,39.125000,9 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {Grand!}} } .TXT 4 -34 48 0 Cg a27.000000,73.125000,38 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain Now let's do some other silly checks:} } .EQN 4 0 49 0 {0:cum_bin}NAME(.5,1,2)={0}?_n_u_l_l_ .EQN 0 29 50 0 {0:cum_bin}NAME(.5,2,2)={0}?_n_u_l_l_ .EQN 3 -29 52 0 {0:test1}NAME:{0:cum_bin}NAME(.5,16,16) .EQN 0 29 53 0 {0:test1}NAME={0}?_n_u_l_l_ .EQN 3 -29 54 0 {0:test2}NAME:{0:binomial}NAME(.5,15,16) .EQN 0 29 56 0 {0:test2}NAME={0}?_n_u_l_l_ .EQN 2 -59 134 0 {0:cum_bin}NAME(.5,16,16)={0}?_n_u_l_l_ .EQN 1 30 57 0 {0:test2}NAME+{0:test1}NAME={0}?_n_u_l_l_ .EQN 0 29 58 0 {0:cum_bin}NAME(.5,15,16)={0}?_n_u_l_l_ .EQN 3 -59 137 0 {0:cum_bin}NAME(.5,15,16)={0}?_n_u_l_l_ .EQN 6 0 139 0 2.441*(10)^(-4)+1.526*(10)^(-5)={0}?_n_u_l_l_ .TXT 5 29 20 0 Cg a73.375000,73.000000,121 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 In the experiment the researcher achieved the perfect 16 out of 16 and the independent rater achieved 12 of 16:}} } .EQN 5 1 60 0 {0:cum_bin}NAME(.5,12,16)={0}?_n_u_l_l_ .TXT 0 25 21 0 Cg a19.250000,47.000000,40 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 Still statistically significant}} } .EQN 1 -55 142 0 {0:cum_bin}NAME(.5,12,16)={0}?_n_u_l_l_ .TXT 5 29 22 0 Cg a49.750000,73.000000,79 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 Let's get Mathcad to tabulate all the possible scores for n=1 to n=20:}} } .EQN 4 1 61 0 {0:n}NAME:1;20 .EQN 1 -30 145 0 {0:n}NAME:1,2;20 .EQN 0 12 146 0 {0:r}NAME:0,1;20 .EQN 3 18 63 0 {0:r}NAME:0;20 .EQN 1 -30 147 0 ({0:z}NAME)[({0:n}NAME,{0:r}NAME):{0:cum_bin}NAME(.5,{0:r}NAME,{0:n}NAME) .EQN 3 30 64 0 ({0:z}NAME)[({0:n}NAME,{0:r}NAME):{0:cum_bin}NAME(.5,{0:r}NAME,{0:n}NAME) .EQN 19 -30 148 0 {0:z}NAME={0}?_n_u_l_l_ .EQN 2 30 65 0 {0:z}NAME={0 0 0 21 21 168 71}?_n_u_l_l_ .TXT 51 0 156 0 C x1,1,0,0 .TXT 5 2 149 0 Cg a69.000000,71.000000,201 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 As a final thought: consider matching against a number of mismatched cases i.e. only one true match but two distractors. In this case the pertinent p for the null model is .33 rather than .5}} } .EQN 6 0 150 0 ({0:y}NAME)[({0:n}NAME,{0:r}NAME):{0:cum_bin}NAME((1)/(3),{0:r}NAME,{0:n}NAME) .EQN 25 0 151 0 {0:y}NAME={0 0 0 21 19 149 65}?_n_u_l_l_ .TXT 45 -2 159 0 C x1,1,0,0 .EQN 7 2 158 0 {0:z}NAME{70 35 0 60 100 39 37 0 3 1 0 -1 1}{57} .TXT 6 39 153 0 Cg a31.375000,32.000000,260 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 This shows the dichotomous case\par The z axis (coming R to L toward you)\par shows the number of cases to be matched\par The x axis (back to forwardL to R) shows the score and \par the y axis shows the probability of scoring \par more than or equal to that score }} } .EQN 52 -39 66 0 {0:y}NAME{70 35 0 60 100 39 37 0 3 1 0 -1 1}{57} .TXT 3 40 161 0 Cg a30.750000,32.000000,181 {\rtf1\ansi \deff0 {\fonttbl {\f0\fnil Arial;} {\f1\fnil Courier New;} } {\plain {\fs20 This shows the trichotomous case, i.e. where there are two possible mismatches for each match. You can see how the probabilities fall away faster as the scores increase}} }