%!PS-Adobe-2.0 %%Creator: dvips 5.83 (MiKTeX 1.20) Copyright 1998 Radical Eye Software %%Title: forensics.dvi %%CreationDate: Fri Apr 16 11:20:19 1999 %%Pages: 6 %%PageOrder: Ascend %%BoundingBox: 0 0 596 842 %%DocumentFonts: Times-Roman Times-Bold Times-Italic %%EndComments %DVIPSWebPage: (www.radicaleye.com) %DVIPSCommandLine: dvips forensics.dvi -o forensics.ps %DVIPSParameters: dpi=600, compressed %DVIPSSource: TeX output 1999.04.16:1120 %%BeginProcSet: texc.pro %! /TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72 mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{ landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[ matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{ statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0] N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin /FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array /BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2 array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get }B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub} B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr 1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3 1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx 0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{ rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B /chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{ /cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{ A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse} ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17 {2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{ 1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop} forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put }if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{ bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{ SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{ userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X 1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4 index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N /p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{ /Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT) (LaserWriter 16/600)]{A length product length le{A length product exch 0 exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot} imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M} B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{ p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end %%EndProcSet %%BeginProcSet: 8r.enc % @@psencodingfile@{ % author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry", % version = "0.6", % date = "1 July 1998", % filename = "8r.enc", % email = "tex-fonts@@tug.org", % docstring = "Encoding for TrueType or Type 1 fonts % to be used with TeX." % @} % % Idea is to have all the characters normally included in Type 1 fonts % available for typesetting. This is effectively the characters in Adobe % Standard Encoding + ISO Latin 1 + extra characters from Lucida. % % Character code assignments were made as follows: % % (1) the Windows ANSI characters are almost all in their Windows ANSI % positions, because some Windows users cannot easily reencode the % fonts, and it makes no difference on other systems. The only Windows % ANSI characters not available are those that make no sense for % typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen % (173). quotesingle and grave are moved just because it's such an % irritation not having them in TeX positions. % % (2) Remaining characters are assigned arbitrarily to the lower part % of the range, avoiding 0, 10 and 13 in case we meet dumb software. % % (3) Y&Y Lucida Bright includes some extra text characters; in the % hopes that other PostScript fonts, perhaps created for public % consumption, will include them, they are included starting at 0x12. % % (4) Remaining positions left undefined are for use in (hopefully) % upward-compatible revisions, if someday more characters are generally % available. % % (5) hyphen appears twice for compatibility with both % ASCII and Windows. % /TeXBase1Encoding [ % 0x00 (encoded characters from Adobe Standard not in Windows 3.1) /.notdef /dotaccent /fi /fl /fraction /hungarumlaut /Lslash /lslash /ogonek /ring /.notdef /breve /minus /.notdef % These are the only two remaining unencoded characters, so may as % well include them. /Zcaron /zcaron % 0x10 /caron /dotlessi % (unusual TeX characters available in, e.g., Lucida Bright) /dotlessj /ff /ffi /ffl /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef /.notdef % very contentious; it's so painful not having quoteleft and quoteright % at 96 and 145 that we move the things normally found there to here. /grave /quotesingle % 0x20 (ASCII begins) /space /exclam /quotedbl /numbersign /dollar /percent /ampersand /quoteright /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash % 0x30 /zero /one /two /three /four /five /six /seven /eight /nine /colon /semicolon /less /equal /greater /question % 0x40 /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O % 0x50 /P /Q /R /S /T /U /V /W /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore % 0x60 /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o % 0x70 /p /q /r /s /t /u /v /w /x /y /z /braceleft /bar /braceright /asciitilde /.notdef % rubout; ASCII ends % 0x80 /.notdef /.notdef /quotesinglbase /florin /quotedblbase /ellipsis /dagger /daggerdbl /circumflex /perthousand /Scaron /guilsinglleft /OE /.notdef /.notdef /.notdef % 0x90 /.notdef /.notdef /.notdef /quotedblleft /quotedblright /bullet /endash /emdash /tilde /trademark /scaron /guilsinglright /oe /.notdef /.notdef /Ydieresis % 0xA0 /.notdef % nobreakspace /exclamdown /cent /sterling /currency /yen /brokenbar /section /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen % Y&Y (also at 45); Windows' softhyphen /registered /macron % 0xD0 /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph /periodcentered /cedilla /onesuperior /ordmasculine /guillemotright /onequarter /onehalf /threequarters /questiondown % 0xC0 /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex /Idieresis % 0xD0 /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn /germandbls % 0xE0 /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex /idieresis % 0xF0 /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn /ydieresis ] def %%EndProcSet %%BeginProcSet: texps.pro %! TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def} ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{ dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1 roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def} if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def} def end %%EndProcSet TeXDict begin 39158280 55380996 1000 600 600 (d:\Users\argray\work\publications\Papers\1999\Forensics (ICONIP)/forensics.dvi) @start %DVIPSBitmapFont: Fa cmsy10 10 1 /Fa 1 121 df<137E3801FFC03807C1E0380F0070001E1338003E131C48130C141E147E 5AA3143C1400A3127CA37E121E7E6C7E6C7EEA00F013FCEA03FF380F8780381F01E0003E 13F0EB00F848137CA200FC133E5A141FA6127C143F6C133EA26C137CEA0F80000713F838 01E1F03800FFC0EB3F00130FEB03C0EB01E0EB00F01478147C143EA3141FA3123C127EA3 143E127812300038137C6C13786C13F0380783E03803FF8038007E00184C7ABA25>120 D E %EndDVIPSBitmapFont /Fb 134[37 1[55 1[42 23 32 32 1[42 42 42 60 23 2[23 42 42 23 37 42 37 1[42 12[46 42 2[51 1[55 3[37 28 3[51 1[55 1[51 6[28 42 42 42 42 4[42 3[28 3[28 28 28 65 38[{ TeXBase1Encoding ReEncodeFont}39 83.022 /Times-Italic rf /Fc 134[50 1[72 1[55 33 39 44 1[55 50 55 83 28 55 1[28 1[50 1[44 55 44 55 50 18[72 94 8[72 72 14[50 50 50 50 2[25 33 45[{TeXBase1Encoding ReEncodeFont}29 99.6264 /Times-Bold rf /Fd 105[42 1[37 37 24[37 42 42 60 42 42 23 32 28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37 42 37 3[28 1[28 1[60 1[78 60 60 51 46 55 60 46 60 60 74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 3[23 23 42 42 42 42 42 42 42 42 42 42 23 21 28 21 47 1[28 28 28 1[69 1[42 31[46 46 2[{TeXBase1Encoding ReEncodeFont}79 83.022 /Times-Roman rf /Fe 138[66 40 47 53 66 66 60 66 100 33 2[33 66 1[40 53 66 53 66 60 12[80 1[86 8[47 5[86 1[86 11[60 60 60 60 60 3[40 45[{TeXBase1Encoding ReEncodeFont}29 119.552 /Times-Bold rf /Ff 133[44 50 50 72 50 50 28 39 33 1[50 50 50 78 28 2[28 50 50 33 44 50 44 1[44 6[61 4[72 1[55 66 1[55 72 72 89 61 2[33 1[72 2[72 1[66 72 92 6[50 50 50 50 50 50 50 1[50 2[25 1[25 56 1[33 33 40[{ TeXBase1Encoding ReEncodeFont}50 99.6264 /Times-Roman rf /Fg 134[72 1[104 1[72 40 56 48 1[72 72 72 112 40 72 1[40 72 72 48 64 72 64 72 64 13[80 96 1[80 1[104 128 6[80 1[104 96 96 104 19[48 36 44[{TeXBase1Encoding ReEncodeFont}33 143.462 /Times-Roman rf end %%EndProlog %%BeginSetup %%Feature: *Resolution 600dpi TeXDict begin %%PaperSize: A4 %%EndSetup %%Page: 1 1 1 0 bop 9 422 a Fg(Softw)o(are)34 b(F)n(orensics)f(for)j (Discriminating)c(between)i(Program)g(Authors)98 605 y(using)h(Case-Based)e(Reasoning,)g(Feed-F)n(orw)o(ard)f(Neural)i(Netw) o(orks)850 788 y(and)h(Multiple)f(Discriminant)f(Analysis)331 1028 y Ff(Stephen)25 b(G.)g(MacDonell,)f(Andre)n(w)g(R.)h(Gray)-6 b(,)25 b(Grant)g(MacLennan,)f(and)h(Philip)f(Sallis)1173 1145 y(Department)h(of)g(Information)e(Science)1484 1261 y(Uni)n(v)o(ersity)f(of)j(Otago)1172 1377 y(PO)g(Box)g(56,)f(Dunedin,)g (Ne)n(w)h(Zealand)988 1493 y(+64)f(3)h(4798135)f(\(phone\))g(+64)h(3)g (4798311)e(\(f)o(ax\))1185 1609 y(ste)n(v)o (emac@infoscience.otago.ac.nz)-9 1935 y Fe(Abstract)-9 2121 y Fd(Softw)o(are)37 b(forensics)g(is)i(a)f(research)f(\002eld)h (that,)43 b(by)37 b(treat-)-9 2220 y(ing)20 b(pieces)h(of)f(program)f (source)h(code)g(as)h(linguistically)f(and)-9 2320 y(stylistically)25 b(analyzable)g(entities,)h(attempts)g(to)g(in)m(v)o(estigate)-9 2420 y(aspects)h(of)g(computer)f(program)f(authorship.)44 b(This)28 b(can)f(be)-9 2519 y(performed)g(with)j(the)g(goal)g(of)g (identi\002cation,)h(discrimina-)-9 2619 y(tion,)h(or)e (characterization)f(of)h(authors.)55 b(In)30 b(this)h(paper)e(we)-9 2718 y(e)o(xtract)34 b(a)i(set)g(of)f(26)g(standard)f(authorship)g (metrics)h(from)-9 2818 y(351)23 b(programs)g(by)h(7)h(dif)n(ferent)e (authors.)36 b(The)25 b(use)f(of)h(feed-)-9 2918 y(forw)o(ard)33 b(neural)g(netw)o(orks,)k(multiple)d(discriminant)f(anal-)-9 3017 y(ysis,)g(and)d(case-based)f(reasoning)g(is)j(then)d(in)m(v)o (estigated)g(in)-9 3117 y(terms)21 b(of)g(classi\002cation)h(accurac)o (y)e(for)h(the)g(authors)g(on)g(both)-9 3217 y(training)k(and)h (testing)g(samples.)43 b(The)26 b(\002rst)i(tw)o(o)e(techniques)-9 3316 y(produce)31 b(remarkably)g(similar)j(results,)j(with)d(the)f (best)h(re-)-9 3416 y(sults)j(coming)e(from)h(the)h(case-based)f (reasoning)f(models.)-9 3515 y(All)16 b(techniques)f(ha)n(v)o(e)g(high) h(prediction)e(accurac)o(y)g(rates,)j(sup-)-9 3615 y(porting)h(the)i (feasibility)g(of)g(the)g(task)h(of)f(discriminating)e(pro-)-9 3715 y(gram)h(authors)g(based)h(on)g(source-code)d(measurements.)-9 3995 y Fe(1)119 b(Intr)n(oduction)-9 4181 y Fd(In)28 b(a)h(surprisingly)e(lar)o(ge)h(number)f(of)i(situations)f(there)h(is)g (a)-9 4281 y(need)22 b(to)h(in)m(v)o(estigate)f(the)h(nature)f(of)h(a)g (computer)f(program')-5 b(s)-9 4381 y(authorship.)24 b(By)c(this)i(it)f(is)g(meant,)f(that)h(there)f(is)h(some)f(ques-)-9 4480 y(tion)k(concerning)e(the)j(authorship)e(of)i(a)g(series)g(of)g (programs)-9 4580 y(or)j(alternati)n(v)o(ely)g(the)h(characteristics)g (of)g(program)e(authors)-9 4679 y([3)o(].)115 4779 y(The)d(most)g (widely)g(kno)n(wn)f(e)o(xample)g(is)i(plagiarism)e(de-)-9 4879 y(tection)j(in)g(an)h(academic)e(setting)h(where)g(students')g (assign-)-9 4978 y(ments)17 b(can)h(be)g(compared)e(to)i(see)g(if)g (some)g(are)g(\223suspiciously)-9 5078 y(similar\224)k([7)o(].)33 b(The)22 b(incidence)g(of)g(highly)g(similar)h(programs)-9 5178 y(can)17 b(pro)o(vide)e(suggesti)n(v)o(e)i(e)n(vidence)f(that)h (one)g(student')-5 b(s)18 b(code)-9 5277 y(may)k(ha)n(v)o(e)h(been)g (deri)n(v)o(ed)f(from)g(another')-5 b(s.)34 b(This)23 b(particular)-9 5377 y(area)k(of)h(research)e(pro)o(vided)g(the)i (origins)e(of)i(the)g(ideas)g(that)-9 5477 y(no)n(w)i(mak)o(e)g(up)g (the)h(\002eld)f(of)h(softw)o(are)f(forensics)g(\226)g(which)-9 5576 y(is)c(de\002ned)f(here)h(as)g(the)g(study)g(of)g(program)d (characteristics)1939 1935 y(with)16 b(the)g(intention)f(of)g (identifying,)g(e)o(xamining,)f(or)h(discrim-)1939 2035 y(inating)20 b(between)f(program)f(authors)h([1)o(].)2064 2152 y(Softw)o(are)34 b(forensics)h(also)g(includes)g(the)g(areas)g(of) g(au-)1939 2252 y(thorship)f(characterization,)j(as)e(in)h (psychological)c(studies)1939 2352 y(of)25 b(the)g(relationship)f (between)g(programmer)e(attrib)n(utes)j(and)1939 2451 y(their)34 b(code)f(and)h(between)f(programming)e(conditions)h(and)1939 2551 y(code.)60 b(The)31 b(analysis)h(of)g(malicious)f(code)h(\(such)f (as)h(com-)1939 2651 y(puter)24 b(viruses,)h(softw)o(are)f(trapdoors,)f (and)h(trojan)f(horses\))h(is)1939 2750 y(another)31 b(application)f(area,)k(although)d(this)h(in)m(v)n(olv)o(es)e(more)1939 2850 y(subjecti)n(v)o(e)24 b(analysis)h([6)o(].)40 b(Other)25 b(applications)e(of)i(softw)o(are)1939 2949 y(forensics)f(include)f (quality)g(control)g(\(through)e(coding)i(stan-)1939 3049 y(dards)f(for)g(e)o(xample,)g(c)o(yclomatic)f(comple)o(xity)g(or)h (comment)1939 3149 y(density)30 b(which)g(can)g(be)g(used)g(as)h(an)f (indicator)f(of)g(internal)1939 3248 y(documentation)d(quality\),)k (author)d(tracking)h(\(for)f(e)o(xample,)1939 3348 y(determining)36 b(the)h(author)g(of)g(code)g(of)g(unkno)n(wn)e(origin\),)1939 3448 y(change)20 b(control)g(\(tracking)g(the)h(authorship)e(of)i (changes)f(and)1939 3547 y(quality)25 b(control)f(when)h(making)f (changes\),)h(and)f(o)n(wnership)1939 3647 y(disputes.)2064 3765 y(While)30 b(the)f(idea)g(of)h(dissenting)e(the)i(structure)e(and) h(na-)1939 3864 y(ture)20 b(of)f(programs)f(to)h(discern)g(some)h (information)d(about)i(the)1939 3964 y(lik)o(ely)g(author)e(or)i (authors)e(and/or)h(their)g(characteristics)g(may)1939 4064 y(appear)27 b(some)n(what)f(esoteric,)j(perhaps)d(e)n(v)o(en)h (unrealistic,)h(it)1939 4163 y(has)20 b(been)f(sho)n(wn)g(that)h(such)f (acti)n(vities)h(are)g(feasible,)f(at)h(least)1939 4263 y(under)f(certain)g(circumstances)g([2)o(].)25 b(In)19 b(f)o(act)h(man)o(y)f(measure-)1939 4362 y(ments)27 b(can)g(be)g(dif)n (\002cult)g(for)f(programmers)f(to)i(change)f([6)o(].)1939 4462 y(An)i(open)e(question)g(is)i(ho)n(w)f(such)g(models)g(should)f (be)i(con-)1939 4562 y(structed)j(to)h(best)g(represent)f(the)h (mappings)e(between)h(pro-)1939 4661 y(gram)19 b(features,)g(authors,)g (and)g(the)h(authors')e(characteristics.)2064 4779 y(In)31 b(this)g(paper)f(the)h(focus)f(will)i(be)e(on)h(the)g(area)f(of)h(de-) 1939 4879 y(v)o(eloping)22 b(models)g(that)i(are)f(capable)f(of)h (discriminating)f(be-)1939 4978 y(tween)36 b(se)n(v)o(eral)g(authors)f (using)h(source-code)d(based)j(mea-)1939 5078 y(surements.)50 b(The)28 b(measurements)f(that)h(are)h(preferred)d(here)1939 5178 y(are)45 b(those)g(that)g(can)g(be)g(automatically)f(e)o(xtracted) g(from)1939 5277 y(source)29 b(code)f(by)h(pattern)f(matching)g (algorithms)f(since)j(the)1939 5377 y(v)n(olumes)19 b(of)h(data)g (needed)e(for)h(these)h(applications)f(will)i(gen-)1939 5477 y(erally)i(surpass)h(con)m(v)o(enient)d(human)g(measurement.)33 b(Appli-)1939 5576 y(cations)d(for)f(such)h(authorship)e (discrimination)g(procedures)p eop %%Page: 2 2 2 1 bop -9 74 a Fd(include)28 b(plagiarism)g(detection,)j(o)n(wnership) c(disputes,)k(and)-9 173 y(the)20 b(psychological)e(study)h(of)h (programmers.)-9 516 y Fe(2)119 b(T)-11 b(echniques)63 b(f)m(or)e(authorship)h(dis-)170 666 y(crimination)-9 890 y Fc(2.1)99 b(Neural)25 b(netw)o(orks)-9 1067 y Fd(There)16 b(are)h(a)g(v)n(ast)h(number)d(of)i(neural)f(netw)o(ork)g (architectures)-9 1167 y(and)22 b(training)g(algorithms)f(contained)g (within)i(the)g(literature.)-9 1266 y(The)15 b(most)h(commonly)e(used)h (architecture)g(for)g(most)h(applica-)-9 1366 y(tions)22 b(is)h(that)g(of)f(a)h(feed-forw)o(ard)c(neural)j(netw)o(ork)f (\(FFNN\),)-9 1466 y(which)i(is)i(still)g(generally)e(trained)g(using)g (some)h(form)f(of)h(the)-9 1565 y(back-propagation)15 b(algorithm.)115 1676 y(The)23 b(main)g(issues)h(when)e(using)h(this)h (approach)c(concern)-9 1776 y(selecting)c(the)g(optimal)f(architecture) g(for)h(the)g(netw)o(ork)f(and)h(in)-9 1876 y(stopping)h(the)h (training)f(\(usually)h(by)g(using)g(data)g(set)h(splitting)-9 1975 y(and)f(stopping)f(training)g(when)h(a)h(v)n(alidation)e(data)i (set)g(error)f(is)-9 2075 y(minimized\).)45 b(The)28 b(use)g(of)f(data)h(set)g(splitting)g(can)f(be)h(seen)-9 2175 y(as)19 b(a)f(disadv)n(antage,)f(since)h(this)h(reduces)f(the)g (amount)f(of)h(data)-9 2274 y(a)n(v)n(ailable)h(for)h(the)g(netw)o(ork) f(to)h(learn)g(the)g(relationships.)115 2385 y(More)26 b(sophisticated)f(approached)f(that)i(do)g(not)g(require)-9 2485 y(hold-out)f(samples)i(are)g(not)g(in)m(v)o(estigated)f(here)g(as) i(the)o(y)f(are)-9 2584 y(lik)o(ely)35 b(to)h(be)g(less)h(accessible)f (to)g(researchers)f(in)h(applied)-9 2684 y(\002elds.)-9 2985 y Fc(2.2)99 b(Discriminant)25 b(analysis)-9 3162 y Fd(Multiple)34 b(discriminant)f(analysis)h(\(MD)m(A\))g(is)i(a)f (statistical)-9 3262 y(technique)21 b(that)j(separates)g(observ)n (ations)d(into)j(tw)o(o)f(or)h(more)-9 3361 y(groups)e(based)i(on)g(se) n(v)o(eral)g(orthogonal)d(linear)j(functions)f(of)-9 3461 y(the)34 b(independent)d(v)n(ariables.)65 b(The)34 b(technique)e(assumes)i(a)-9 3561 y(reasonable)17 b(de)o(gree)g(of)h (multi)n(v)n(ariate)f(normality)-5 b(,)17 b(with)h(logis-)-9 3660 y(tic)25 b(re)o(gression)f(an)i(alternati)n(v)o(e)e(where)g(this)i (is)g(not)f(the)h(case.)-9 3760 y(Ho)n(we)n(v)o(er)d(ordinary)f (logistic)j(re)o(gression)e(is)j(more)e(suited)h(to)-9 3860 y(binary)15 b(outcomes,)h(and)g(will)h(not)g(be)f(discussed)h (further)e(here.)115 3971 y(A)24 b(signi\002cant)f(adv)n(antage)f(of)h (discriminant)f(analysis)i(as)-9 4070 y(a)k(technique)f(is)i(the)f (easy)g(a)n(v)n(ailability)g(of)g(stepwise)g(proce-)-9 4170 y(dures)16 b(for)h(controlling)e(the)i(entry)f(and)h(remo)o(v)n (al)e(of)i(v)n(ariables.)-9 4270 y(By)22 b(w)o(orking)e(with)i(only)f (those)g(necessary)g(v)n(ariables)g(we)h(in-)-9 4369 y(crease)27 b(the)h(chance)e(of)i(the)g(model)e(being)h(able)g(to)h (general-)-9 4469 y(ize)23 b(to)h(ne)n(w)f(sets)h(of)f(data.)35 b(In)23 b(addition,)f(the)i(data)f(collection)-9 4568 y(costs)17 b(can)h(be)f(reduced,)f(sometimes)h(signi\002cantly)-5 b(,)16 b(by)h(w)o(ork-)-9 4668 y(ing)i(with)i(a)f(smaller)h(set)g(of)e (v)n(ariables.)115 4779 y(Another)28 b(adv)n(antage)g(of)h(the)h (technique)e(is)j(that)f(it)g(pro-)-9 4879 y(vides)c(probability)e (information)g(for)i(the)g(predictions,)h(both)-9 4978 y(in)i(terms)h(of)f(the)h(conditional)e(probability)g(of)h(an)g(observ) n(a-)-9 5078 y(tion)23 b(belonging)f(to)i(a)h(particular)d(class)j(gi)n (v)o(en)e(its)i(classi\002ca-)-9 5178 y(tion)18 b(and)g(the)g (conditional)f(probability)f(that)j(a)f(particular)g(ob-)-9 5277 y(serv)n(ation)k(will)i(be)f(classi\002ed)h(as)g(belonging)d(to)i (a)h(particular)-9 5377 y(class)i(gi)n(v)o(en)e(its)j(real)e(class.)42 b(In)25 b(a)h(le)o(gal)f(setting)h(such)f(infor)n(-)-9 5477 y(mation)19 b(w)o(ould)g(certainly)g(be)h(required)f(if)h(softw)o (are)g(forensic)-9 5576 y(results)g(were)g(to)g(be)h(accepted)e(as)i(e) n(vidence.)1939 74 y Fc(2.3)100 b(Case-based)25 b(r)n(easoning)1939 245 y Fd(Case-based)d(reasoning)e(\(CBR\))j(is)g(a)f(method)e(for)h (modeling)1939 344 y(the)29 b(relationship)e(between)h(a)i(series)f(of) f(independent)e(v)n(ari-)1939 444 y(ables)i(and)g(one)f(or)h(more)f (dependent)f(v)n(ariables)h(by)g(storing)1939 543 y(the)21 b(cases)h(\(observ)n(ations\))c(in)j(a)h(database.)k(When)21 b(presented)1939 643 y(with)34 b(a)f(ne)n(w)g(observ)n(ation,)h(the)f (cases)h(that)f(are)g(similar)g(in)1939 743 y(terms)20 b(of)e(the)i(independent)c(v)n(ariables)i(are)i(retrie)n(v)o(ed)d(and)i (the)1939 842 y(dependent)26 b(v)n(ariables)i(calculated)f(from)g(them) h(using)g(some)1939 942 y(form)20 b(of)f(\223a)n(v)o(eraging\224)f (process.)2064 1050 y(CBR)k(has)e(the)g(adv)n(antages)e(of)i(not)g (requiring)e(an)o(y)h(distri-)1939 1149 y(b)n(utional)c(assumptions)g Fb(per)i(se)f Fd(b)n(ut)g(does)g(require)f(the)h(speci\002-)1939 1249 y(cation)f(of)h(a)g(distance)f(metric)g(\(for)g(\002nding)f(the)i (closest)g(e)o(x)o(em-)1939 1348 y(plars)i(to)h(the)f(presented)f(case) h(and)g(calculating)f(their)g(similar)n(-)1939 1448 y(ity\).)25 b(Scaling)20 b(\(if)g(an)o(y)f(is)i(used\))e(when)g(measuring)g (similarity)1939 1548 y(can)24 b(be)h(based)f(on)f(ranges)h(or)g (standardized)e(v)n(alues)i(if)h(some)1939 1647 y(distrib)n(utional)f (assumptions)h(are)g(made.)39 b(The)25 b(other)g(aspect)1939 1747 y(that)d(requires)f(some)h(thought)e(is)j(the)f(selection)g(of)g (a)g(method)1939 1847 y(for)g(combining)e(the)i(cases.)31 b(Again,)22 b(a)h(simple)f(weighted)f(a)n(v-)1939 1946 y(erage)16 b(approach)d(can)j(be)g(used)f(once)g(the)h(distance)g (metric)f(has)1939 2046 y(been)22 b(decided)f(on,)i(with)f(perhaps)f (some)i(po)n(wer)e(of)h(distance)1939 2145 y(used)16 b(to)g(increase)f(the)h(in\003uence)f(of)h(closer)f(observ)n(ations)f (and)1939 2245 y(reduce)33 b(the)h(in\003uence)f(of)h(outliers.)66 b(In)33 b(most)h(implemen-)1939 2345 y(tations)27 b(a)g(threshold)e(of) h(similarity)h(or)f(a)h(limit)g(of)f(\223related\224)1939 2444 y(cases)f(is)g(used)e(to)h(pre)n(v)o(ent)f(all)h(stored)f(cases)i (in\003uencing)d(all)1939 2544 y(predictions.)2064 2652 y(One)16 b(particular)f(case-based)h(reasoning)e(system)j(that)f(has) 1939 2751 y(been)34 b(pre)n(viously)f(used)i(for)f(softw)o(are)g (metric)h(research)f(is)1939 2851 y(the)g(ANGEL)f(system)g([5)o(].)65 b(ANGEL)33 b(has)h(also)f(been)g(im-)1939 2951 y(plemented)20 b(as)h(part)g(of)f(the)h(IDENTIFIED)f(system)h(that)g(w)o(as)1939 3050 y(used)j(in)g(this)h(paper)e(for)g(the)h(measurement)e(e)o (xtraction,)h(and)1939 3150 y(CBR)32 b(and)e(FFNN)h(models)f([1)o(,)h (4)o(].)55 b(The)30 b(ANGEL)g(system)1939 3249 y(also)22 b(allo)n(ws)f(for)g(the)g(automatic)f(selection)h(of)g(rele)n(v)n(ant)f (v)n(ari-)1939 3349 y(ables)38 b(\(at)g(some)f(considerable)f (computational)f(cost\),)41 b(al-)1939 3449 y(though)19 b(here)h(no)f(attempt)h(will)h(be)g(made)e(to)i(select)g(an)o(y)e (opti-)1939 3548 y(mal)i(subset)f(of)g(v)n(ariables)f(when)h(using)g (this)g(technique.)1939 3873 y Fe(3)120 b(A)-6 b(uthorship)31 b(data)f(set)1939 4074 y Fd(The)16 b(data)g(that)g(we)g(ha)n(v)o(e)f (chosen)g(to)h(illustrate)g(the)g(author)f(dis-)1939 4173 y(crimination)26 b(problem)g(e)o(xhibits)h(man)o(y)f(of)h(the)h (characteris-)1939 4273 y(tics)18 b(that)e(present)g(some)h(of)f(the)g (most)h(perple)o(xing)d(dif)n(\002culties)1939 4373 y(found)23 b(when)i(undertaking)c(such)k(analyses.)38 b(The)25 b(data)g(con-)1939 4472 y(tains)32 b(programs)d(from)h(se)n(v)o(en)h(authors)f(with)h (widely)g(v)n(ary-)1939 4572 y(ing)22 b(amounts)f(of)h(data)h(and)e (from)g(three)h(basic)h(source)e(types.)1939 4671 y(26)f(measures)f (were)h(e)o(xtracted)e(for)h(each)h(program)d(using)j(the)1939 4771 y(IDENTIFIED)f(tool)h(\(T)-7 b(able)20 b(1\).)2064 4879 y(All)31 b(programs)e(were)i(written)f(in)h(standard)f(C++.)57 b(The)1939 4978 y(source)52 b(code)h(for)f(authors)g(one,)60 b(tw)o(o,)h(and)52 b(three)h(are)1939 5078 y(from)31 b(programming)d(books;)37 b(authors)30 b(four)m(,)j(\002)n(v)o(e,)h (and)d(six)1939 5178 y(are)c(e)o(xperienced)d(commercial)h (programmers;)j(and)e(author)1939 5277 y(se)n(v)o(en')-5 b(s)28 b(code)g(is)h(from)f(e)o(xamples)f(pro)o(vided)f(with)j(a)f (popu-)1939 5377 y(lar)23 b(C++)g(compiler)-5 b(.)31 b(The)23 b(choice)e(of)i(program)d(sources)i(may)1939 5477 y(appear)i(unusual,)g(b)n(ut)h(it)g(w)o(as)h(felt)f(that)g(the)f (usual)h(source)f(of)1939 5576 y(student)c(programs)e(w)o(as)j(no)f (more)f(realistic.)p eop %%Page: 3 3 3 2 bop 554 -6 2653 4 v 552 93 4 100 v 604 64 a Fd(Measurement)p 1164 93 V 157 w(Description)p 3205 93 V 554 97 2653 4 v 552 196 4 100 v 604 167 a(WHITE)p 1164 196 V 343 w(Proportion)18 b(of)i(lines)h(that)f(are)g(blank)p 3205 196 V 552 296 V 604 266 a(SP)-8 b(A)m(CE-1)p 1164 296 V 294 w(Proportion)18 b(of)i(operators)f(with)h(whitespace)g(on)g(both)f(sides)p 3205 296 V 552 396 V 604 366 a(SP)-8 b(A)m(CE-2)p 1164 396 V 294 w(Proportion)18 b(of)i(operators)f(with)h(whitespace)g(on)g (left)g(side)p 3205 396 V 552 495 V 604 465 a(SP)-8 b(A)m(CE-3)p 1164 495 V 294 w(Proportion)18 b(of)i(operators)f(with)h(whitespace)g (on)g(right)f(side)p 3205 495 V 552 595 V 604 565 a(SP)-8 b(A)m(CE-4)p 1164 595 V 294 w(Proportion)18 b(of)i(operators)f(with)h (whitespace)g(on)g(neither)f(side)p 3205 595 V 552 695 V 604 665 a(LOCCHARS)p 1164 695 V 169 w(Mean)h(number)f(of)g (characters)h(per)f(line)p 3205 695 V 552 794 V 604 764 a(CAPS)p 1164 794 V 404 w(Proportion)f(of)i(letters)h(that)f(are)g (upper)f(case)p 3205 794 V 552 894 V 604 864 a(LOC)p 1164 894 V 445 w(Non-whitespace)g(lines)h(of)g(code)p 3205 894 V 552 993 V 604 964 a(DB)o(UGSYM)p 1164 993 V 197 w(Deb)n(ug)g(v)n(ariables)f(per)h(line)g(of)g(code)g(\(LOC\))p 3205 993 V 552 1093 V 604 1063 a(DB)o(UGPRN)p 1164 1093 V 216 w(Commented)f(out)h(deb)n(ug)f(print)g(statements)i(per)e(LOC)p 3205 1093 V 552 1193 V 604 1163 a(COM)p 1164 1193 V 422 w(Proportion)f(of)i(LOC)h(that)f(are)g(purely)f(comment)p 3205 1193 V 552 1292 V 604 1262 a(INLCOM)p 1164 1292 V 283 w(Proportion)f(of)i(LOC)h(that)f(ha)n(v)o(e)g(inline)g(comments)p 3205 1292 V 552 1392 V 604 1362 a(ENDCOM)p 1164 1392 V 251 w(Proportion)e(of)i(end-of-block)d(braces)j(labelled)f(with)i (comments)p 3205 1392 V 552 1492 V 604 1462 a(GO)m(T)o(O)p 1164 1492 V 384 w(Gotos)f(per)g(non-comment)d(LOC)k(\(NCLOC\))p 3205 1492 V 552 1591 V 604 1561 a(COND-1)p 1164 1591 V 306 w(Number)e(of)h(#if)g(per)g(NCLOC)p 3205 1591 V 552 1691 V 604 1661 a(COND-2)p 1164 1691 V 306 w(Number)f(of)h(#elif)g (per)g(NCLOC)p 3205 1691 V 552 1790 V 604 1761 a(COND-3)p 1164 1790 V 306 w(Number)f(of)h(#ifdef)f(per)h(NCLOC)p 3205 1790 V 552 1890 V 604 1860 a(COND-4)p 1164 1890 V 306 w(Number)f(of)h(#ifndef)f(per)g(NCLOC)p 3205 1890 V 552 1990 V 604 1960 a(COND-5)p 1164 1990 V 306 w(Number)g(of)h(#else) g(per)g(NCLOC)p 3205 1990 V 552 2089 V 604 2059 a(COND-6)p 1164 2089 V 306 w(Number)f(of)h(#endif)f(per)h(NCLOC)p 3205 2089 V 552 2189 V 604 2159 a(COND)p 1164 2189 V 376 w(Conditional)f(compilation)g(k)o(e)o(yw)o(ords)f(per)i(NCLOC)p 3205 2189 V 552 2289 V 604 2259 a(CCN)p 1164 2289 V 441 w(McCabe')-5 b(s)21 b(c)o(yclomatic)e(comple)o(xity)f(number)p 3205 2289 V 552 2388 V 604 2358 a(DEC-IF)p 1164 2388 V 343 w(if)j(statements)f(per)g(NCLOC)p 3205 2388 V 552 2488 V 604 2458 a(DEC-SWITCH)p 1164 2488 V 99 w(switch)h(statements)f (per)g(NCLOC)p 3205 2488 V 552 2587 V 604 2558 a(DEC-WHILE)p 1164 2587 V 149 w(while)h(statements)f(per)g(NCLOC)p 3205 2587 V 552 2687 V 604 2657 a(DEC)p 1164 2687 V 445 w(Decision)g(statements)h(per)e(NCLOC)p 3205 2687 V 554 2690 2653 4 v 1371 2838 a(T)-7 b(able)21 b(1:)k(The)20 b(26)g(v)n(ariables)f(used)115 3163 y(F)o(or)g(the)g(purposes)f(of)h (testing)g(the)h(v)n(arious)e(models)g(to)i(be)-9 3262 y(de)n(v)o(eloped)i(in)j Fa(x)p Fd(4.1,)g(4.2,)h(and)e(4.3,)i(the)f(a)n (v)n(ailable)f(data)h(w)o(as)-9 3362 y(split)17 b(\(as)g(sho)n(wn)f(in) h(T)-7 b(able)17 b(2\))f(with)h(strati\002cation)g(\(as)g(equally)-9 3462 y(as)38 b(possible\))f(across)h(authors.)76 b(The)37 b(split)h(w)o(as)h(approxi-)-9 3561 y(mately)23 b(25\045)h(in)g(the)g (T)m(raining)f(1)h(set,)i(25\045)e(in)g(the)g(T)m(raining)-9 3661 y(2)c(set,)h(and)e(50\045)h(in)g(the)h(T)-6 b(esting)20 b(set.)115 3826 y(In)k(some)g(cases,)h(especially)e(for)h(authors)f(4)h (and)f(5,)i(v)o(ery)-9 3925 y(little)32 b(data)g(is)g(a)n(v)n(ailable,) i(b)n(ut)e(this)g(can)g(be)g(seen)f(as)i(a)f(use-)-9 4025 y(ful)27 b(test)i(of)f(a)h(situation)e(certain)h(to)g(arise)g(in)g (practice.)48 b(The)-9 4125 y(only)30 b(concern)g(here)i(is)g(that)g (the)g(prior)e(probabilities)h(from)-9 4224 y(the)21 b(T)m(raining)e(set)j(match)f(the)g(posterior)f(probabilities)g(in)h (the)-9 4324 y(T)-6 b(esting)25 b(set.)41 b(In)25 b(a)g (simulation-based)f(study)g(the)i(use)f(of)g(re-)-9 4424 y(sampling)34 b(w)o(ould)g(appear)h(a)g(better)g(choice)g(to)g(assess)i (the)-9 4523 y(techniques.)73 b(Ho)n(we)n(v)o(er)35 b(since)i(this)h (study)e(in)m(v)n(olv)o(es)g(only)1939 3163 y(one)24 b(split)g(of)g(the)g(data)g(set,)i(the)e(use)g(of)g(strati\002cation)f (seems)1939 3262 y(preferable)e(to)h(the)g(increased)g(ef)n(fects)f(of) h(chance)g(bought)e(on)1939 3362 y(by)g(resampling.)1939 3657 y Fe(4)120 b(Results)1939 3864 y Fc(4.1)100 b(Neural)25 b(netw)o(ork)1939 4025 y Fd(The)k(ultimately)f(selected)h(FFNN)h(w)o (as)g(a)f(26-9-7)e(netw)o(ork,)1939 4125 y(with)17 b(the)f(logistic)h (transfer)e(for)h(both)f(hidden)g(and)h(output)f(lay-)1939 4224 y(ers.)25 b(The)18 b(best)g(netw)o(ork)f(found)g(w)o(as)i(trained) e(for)g(250)h(epochs)1939 4324 y(using)i(the)g(backpropagation)c (algorithm)j(\(learning)f(rate)j(0.2,)1939 4424 y(momentum)h(0.9\).)36 b(All)25 b(26)e(v)n(ariables)h(pro)o(vided)d(were)j(used.)1939 4523 y(Half)i(of)f(the)g(training)f(data)h(\(T)m(raining)e(1\))i(w)o (as)h(used)f(for)g(the)p 702 4719 2357 4 v 700 4819 4 100 v 1510 4819 V 1510 4819 V 2033 4789 a(Author)p 2788 4819 V 3057 4819 V 700 4919 V 752 4889 a(Data)c(set)p 1510 4919 V 1510 4919 V 581 w(1)182 b(2)141 b(3)f(4)99 b(5)141 b(6)g(7)p 2788 4919 V 99 w(T)-7 b(otal)p 3057 4919 V 702 4922 2357 4 v 700 5022 4 100 v 752 4992 a(T)m(raining)19 b(1)p 1510 5022 V 463 w(17)141 b(29)f(7)g(3)99 b(1)h(11)e(21)p 2788 5022 V 185 w(89)p 3057 5022 V 700 5121 V 752 5091 a(T)m(raining)19 b(2/V)-9 b(alidation)p 1510 5121 V 97 w(17)141 b(28)f(6)g(3)99 b(2)h(10)e(21)p 2788 5121 V 185 w(87)p 3057 5121 V 700 5221 V 752 5191 a(T)-6 b(esting)p 1510 5221 V 565 w(34)141 b(57)98 b(13)140 b(6)99 b(2)h(21)e(42)p 2788 5221 V 143 w(175)p 3057 5221 V 702 5224 2357 4 v 700 5324 4 100 v 752 5294 a(T)-7 b(otal)p 1510 5324 V 640 w(68)99 b(114)f(26)h(12)f(5)i(42)e(84)p 2788 5324 V 143 w(351)p 3057 5324 V 702 5327 2357 4 v 1501 5475 a(T)-7 b(able)20 b(2:)25 b(Data)c(set)g(splits)p eop %%Page: 4 4 4 3 bop -9 74 a Fd(actual)30 b(training,)i(while)f(the)g(remainder)e (\(T)m(raining)g(2\))i(w)o(as)-9 173 y(used)19 b(to)i(stop)f(training)f (and)h(select)g(the)h(best)f(architecture.)115 280 y(T)-7 b(able)37 b(3)h(sho)n(ws)f(the)h(confusion)d(matrix)i(for)g(the)g(net-) -9 380 y(w)o(ork')-5 b(s)25 b(predictions)f(on)i(the)g(testing)g(set.) 42 b(Those)26 b(programs)-9 479 y(that)16 b(were)g(correctly)e (classi\002ed)j(are)f(sho)n(wn)f(as)i(box)o(ed)d(entries)-9 579 y(on)23 b(the)h(main)f(diagonal.)35 b(As)25 b(can)e(be)h(seen)g (the)g(netw)o(ork)f(has)-9 678 y(a)30 b(high)g(classi\002cation)g(rate) h(of)f(81.1\045.)54 b(Authors)30 b(tw)o(o)g(and)-9 778 y(three)e(are)h(ob)o(viously)e(distinct)i(from)f(all)h(others,)i(while) e(the)-9 878 y(small)k(amount)e(of)h(data)h(a)n(v)n(ailable)f(for)g (author)f(\002)n(v)o(e)h(seems)-9 977 y(lik)o(ely)21 b(to)h(be)f(responsible)f(for)h(all)h(of)g(those)f(programs)f(being)-9 1077 y(misclassi\002ed.)115 1184 y(Since)i(this)h(technique)d(w)o(as)j (the)f(only)f(one)h(that)g(required)-9 1283 y(splitting)d(the)h (training)f(data,)g(all)i(other)e(techniques)f(were)i(de-)-9 1383 y(v)o(eloped)d(using)i(both)f(training)h(data)g(sets)h(\(T)m (raining)e(1)h(and)g(2\))-9 1483 y(and)27 b(just)h(the)f(\002rst)h (50\045)g(\(T)m(raining)d(1\).)47 b(The)27 b(other)g(model-)-9 1582 y(ing)18 b(techniques)g(when)g(tuned)g(using)h(both)f(training)g (data)h(sets)-9 1682 y(could)d(be)h(e)o(xpected)e(to)i(enjo)o(y)g(an)g (adv)n(antage)e(o)o(v)o(er)h(the)h(neural)-9 1781 y(netw)o(ork)g(model) h(in)g(terms)h(of)f(the)h(greater)e(number)m(,)g(and)h(thus)-9 1881 y(richness,)27 b(of)f(cases)h(a)n(v)n(ailable.)43 b(While)27 b(in)f(the)h(second)e(case)-9 1981 y(the)31 b(neural)f(netw)o(ork)g(models)h(should)f(ha)n(v)o(e)h(an)g(adv)n (antage)-9 2080 y(since)21 b(the)o(y)g(are)g(tuned)g(on)g(the)g(same)h (data)f(set)h(whilst)g(ha)n(ving)-9 2180 y(their)h(generalisability)e (encouraged)g(by)i(the)g(use)h(of)f(the)g(v)n(al-)-9 2280 y(idation)i(set.)42 b Fa(x)p Fd(4.4)26 b(sho)n(ws)f(the)h (performance)d(of)j(all)g(models)-9 2379 y(on)19 b(all)i(\(sub\)sets)f (of)g(data.)-9 2656 y Fc(4.2)99 b(Multiple)25 b(discriminant)g (analysis)-9 2825 y Fd(The)30 b(MD)m(A)h(w)o(as)h(a)f(stepwise)g(MD)m (A)g(\(W)m(ilk')-5 b(s)31 b(lambda)f(w)o(as)-9 2924 y(used)20 b(for)f(entry)h(and)f(e)o(xit)h(of)g(v)n(ariables\).)k(Prior)c (probabilities)-9 3024 y(were)27 b(obtained)f(from)g(the)i(data)f(and)g (within)h(group)d(co)o(v)n(ari-)-9 3124 y(ance)17 b(matrices)h(were)h (used.)24 b(As)19 b(discussed)f(in)g Fa(x)p Fd(4.1)f(both)h(sets)-9 3223 y(of)23 b(training)f(data)h(were)h(used)f(as)h(part)f(of)g(the)h (model)e(param-)-9 3323 y(eter)g(tuning)g(since)h(no)g(model)f (selection)g(process)h(w)o(as)g(used.)-9 3423 y(Another)f(model)i(w)o (as)h(de)n(v)o(eloped)c(using)j(only)f(the)h(T)m(raining)-9 3522 y(1)c(data)g(set)h(\(50\045)f(of)g(the)g(training)g(data\).)25 b(See)20 b Fa(x)p Fd(4.4)g(for)g(these)-9 3622 y(results.)115 3729 y(T)-7 b(able)37 b(4)g(sho)n(ws)g(the)g(confusion)d(matrix)j(for)f (the)h(pre-)-9 3828 y(dictions)29 b(made)g(on)g(the)g(with-held)g (testing)g(data.)53 b(As)31 b(with)-9 3928 y(the)24 b(neural)f(netw)o (ork)g(model)g(the)h(performance)d(accurac)o(y)i(is)-9 4027 y(81.1\045)31 b(when)h(using)g(all)h(training)f(data.)62 b(The)32 b(patterns)g(of)-9 4127 y(confusion)c(are)i(similar)g(for)g (authors)f(four)m(,)i(six,)i(and)d(se)n(v)o(en)-9 4227 y(b)n(ut)20 b(rather)f(dif)n(ferent)g(for)g(the)h(other)g(authors.)-9 4503 y Fc(4.3)99 b(Case-based)25 b(r)n(easoning)-9 4672 y Fd(The)h(case-based)g(reasoning)g(model)g(w)o(as)i(de)n(v)o(eloped)c (using)-9 4772 y(the)g(ANGEL)h(algorithm,)f(with)g(5)h(analogies)f(and) g(weighted)-9 4872 y(means)32 b(for)h(case)g(aggre)o(gation.)61 b(T)m(ie)33 b(resolution)f(w)o(as)i(also)-9 4971 y(used.)39 b(All)26 b(v)n(ariables)f(were)g(normalized)e(in)i(order)f(to)i(main-) -9 5071 y(tain)20 b(a)g(comparable)f(scale.)115 5178 y(All)i(26)g(v)n(ariables)e(were)i(used,)f(with)h(tw)o(o)g(models)f(de) n(v)o(el-)-9 5277 y(oped)h(\226)h(one)g(using)g(only)f(50\045)h(of)g (the)h(training)e(data)h(\(T)m(rain-)-9 5377 y(ing)29 b(1\))h(and)f(another)g(using)g(all)i(training)e(data)h(\(T)m(raining)e (1)-9 5477 y(and)20 b(2\).)27 b(See)21 b Fa(x)p Fd(4.4)f(for)h(a)g (discussion)f(of)h(the)g(performance)d(of)-9 5576 y(this)i (reduced-data)e(model.)2064 74 y(T)-7 b(able)26 b(5)g(sho)n(ws)g(the)f (confusion)f(matrix)i(for)f(the)h(testing)1939 173 y(data)g(set.)41 b(There)25 b(is)i(a)e(considerably)f(higher)g(le)n(v)o(el)h(of)g(accu-) 1939 273 y(rac)o(y)g(compared)e(to)j(the)f(neural)g(netw)o(ork)f(and)h (discriminant)1939 372 y(analysis)k(models,)g(with)g(88.0\045)e (accurac)o(y)g(achie)n(v)o(ed)f(when)1939 472 y(using)20 b(all)h(training)e(data.)1939 727 y Fc(4.4)100 b(Comparison)1939 888 y Fd(T)-7 b(able)20 b(6)g(sho)n(ws)g(the)g(results)g(for)f(all)i (\002)n(v)o(e)e(models)g(de)n(v)o(eloped.)1939 988 y(Note)24 b(that)g(the)h(\223training)d(set\224)j(errors)e(for)g(the)i(CBR)g (models)1939 1087 y(are)e(lea)n(v)o(e-one-out)d(since)j(the)g(case)g (to)g(be)g(predicted)f(should)1939 1187 y(ob)o(viously)30 b(not)h(be)h(in)f(the)h(training)e(set.)60 b(As)33 b(can)e(be)h(seen) 1939 1287 y(the)f(results)h(for)e(the)h(FFNN)h(and)f(MD)m(A)g(models)f (are)h(quite)1939 1386 y(remarkably)g(almost)i(identical)g(\(the)g (FFNN)h(and)f(full-data)1939 1486 y(MD)m(A)h(are)f(in)h(f)o(act)g (identical\).)64 b(Ho)n(we)n(v)o(er)m(,)35 b(each)e(of)g(these)1939 1585 y(models)27 b(made)f(rather)g(dif)n(ferent)f(patterns)h(of)g (confusion)f(on)1939 1685 y(all)c(data)f(sets.)2064 1788 y(The)k(best)g(performing)e(technique)g(in)j(all)f(cases)h(is)h(case-) 1939 1887 y(based)19 b(reasoning.)k(In)c(terms)g(of)g(predicti)n(v)o(e) f(performance)e(on)1939 1987 y(the)32 b(test)h(data)e(set,)k(its)e (predictions)d(were)h(almost)h(7\045)g(bet-)1939 2087 y(ter)38 b(which)e(appears)g(to)h(be)g(a)h(useful)e(increase)h(in)g (perfor)n(-)1939 2186 y(mance.)63 b(Ev)o(en)31 b(with)i(the)g(reduced)f (training)f(data)i(set,)k(the)1939 2286 y(case-based)e(reasoning)f (model)g(outperformed)e(the)k(neural)1939 2386 y(netw)o(ork)19 b(model)h(by)f(5.2\045.)2064 2488 y(This)k(is)h(suspected)e(to)h(be)g (a)h(result)f(of)g(the)g(f)o(act)g(that)g(pro-)1939 2588 y(grammers)32 b(ha)n(v)o(e)g(more)g(than)g(one)g(style)i(of)e (programming)1939 2687 y(leading)15 b(to)h(se)n(v)o(eral)f (multi-dimensional)e(\223clouds\224)i(of)g(points.)1939 2787 y(Some)22 b(sets)h(of)e(programs)f(for)i(a)g(gi)n(v)o(en)e (programmer)f(are)j(ap-)1939 2887 y(parently)31 b(within)i(other)e (programmer')-5 b(s)30 b(\223clouds\224)i(of)g(met-)1939 2986 y(rics,)44 b(pre)n(v)o(enting)36 b(simple)i(e)o(xplicit)g (classi\002cation)h(bound-)1939 3086 y(aries)21 b(from)e(properly)f (classifying)i(the)g(systems.)1939 3383 y Fe(5)120 b(Conclusions)1939 3574 y Fd(The)34 b(use)h(of)f(the)g(proposed)e(set)j(of)f(metrics)h (for)e(discrimi-)1939 3674 y(nating)22 b(between)f(se)n(v)o(en)h (authors)f(sho)n(ws)h(promising)f(results,)1939 3774 y(especially)31 b(when)f(using)h(the)g(case-based)f(reasoning)f(tech-) 1939 3873 y(nique.)47 b(All)29 b(techniques)d(ho)n(we)n(v)o(er)g(pro)o (vided)f(accurac)o(y)h(be-)1939 3973 y(tween)21 b(81.1\045)e(and)h (88.0\045)g(on)g(a)h(holdout)e(testing)h(set)h(w)o(ould)1939 4073 y(be)c(certainly)f(encouraging)d(for)j(the)h(softw)o(are)f (forensics)g(\002eld)1939 4172 y(as)21 b(a)g(whole.)2064 4275 y(It)50 b(is)g(tentati)n(v)o(ely)e(suggested)g(here)h(that)g(the)h (nature)1939 4374 y(of)34 b(class)i(boundaries)c(for)i(forensic)f (applications)g(is)i(more)1939 4474 y(amenable)20 b(to)h(modeling)e (using)i(case-based)f(reasoning)f(than)1939 4574 y(partitioning)25 b(approaches.)43 b(The)26 b(idea)h(of)f(multiple)g(clusters)1939 4673 y(suggests)18 b(that)g(other)g(neural)f(netw)o(ork)f (architectures)h(such)h(as)1939 4773 y(v)n(ariants)i(of)g(L)-8 b(VQ)20 b(could)f(be)h(fruitfully)f(applied)g(here.)2064 4876 y(W)-7 b(e)29 b(are)g(no)n(w)e(comparing)f(the)i(performance)e(of) i(dif)n(fer)n(-)1939 4975 y(ent)20 b(sets)h(of)f(forensic)f(metrics,)g (both)g(structural)g(and)h(stylistic)1939 5075 y(to)29 b(determine)e(which)h(are)g(the)g(most)g(useful)g(in)h(certain)f(cir)n (-)1939 5175 y(cumstances.)54 b(Since)30 b(stylistic)h(metrics)f(are)g (easier)h(to)f(f)o(ak)o(e)1939 5274 y(than)d(structural,)h(the)f (ability)g(of)g(the)g(latter)g(to)h(discriminate)1939 5374 y(authorship)19 b(is)i(more)e(useful.)2064 5477 y(Another)h(area)i(of)g(interest)f(is)i(ho)n(w)e(each)h(technique)e (per)n(-)1939 5576 y(forms)i(gi)n(v)o(en)e(certain)i(quantities)g(of)f (data.)31 b(Whilst)23 b(the)f(CBR)p eop %%Page: 5 5 5 4 bop 662 221 2437 4 v 660 320 4 100 v 1185 320 V 1598 290 a Fd(Predicted)19 b(author)g(number)p 2829 320 V 3097 320 V 660 420 V 1185 420 V 1334 390 a(1)197 b(2)g(3)183 b(4)f(5)197 b(6)g(7)p 2829 420 V 99 w(T)-7 b(otal)p 3097 420 V 662 423 2437 4 v 660 538 4 115 v 712 715 a(Actual)712 815 y(author)712 914 y(number)1095 508 y(1)p 1185 538 V 1236 426 140 4 v 1236 534 4 108 v 127 w(20)p 1372 534 V 1236 537 140 4 v 225 w(1)197 b(6)183 b(1)421 b(1)197 b(5)p 2829 538 4 115 v 185 w(34)p 3097 538 V 660 653 V 1095 623 a(2)p 1185 653 V 1475 541 140 4 v 1475 649 4 108 v 367 w(57)p 1611 649 V 1475 652 140 4 v 2829 653 4 115 v 1378 w(57)p 3097 653 V 660 767 V 1095 737 a(3)p 1185 767 V 1714 656 140 4 v 1714 763 4 108 v 606 w(13)p 1851 763 V 1714 766 140 4 v 2829 767 4 115 v 1139 w(13)p 3097 767 V 660 882 V 1095 852 a(4)p 1185 882 V 436 w(2)p 1980 771 98 4 v 1980 877 4 107 v 393 w(4)p 2075 877 V 1980 880 98 4 v 2829 882 4 115 v 957 w(6)p 3097 882 V 660 997 V 1095 967 a(5)p 1185 997 V 436 w(2)p 2204 885 98 4 v 2204 993 4 108 v 617 w(0)p 2299 993 V 2204 996 98 4 v 2829 997 4 115 v 733 w(2)p 3097 997 V 660 1112 V 1095 1082 a(6)p 1185 1112 V 197 w(1)g(2)g(1)p 2402 1000 140 4 v 2402 1107 4 108 v 576 w(17)p 2538 1107 V 2402 1110 140 4 v 2829 1112 4 115 v 452 w(21)p 3097 1112 V 660 1226 V 1095 1196 a(7)p 1185 1226 V 197 w(4)g(3)g(4)p 2641 1115 140 4 v 2641 1222 4 108 v 815 w(31)p 2777 1222 V 2641 1225 140 4 v 2829 1226 4 115 v 213 w(42)p 3097 1226 V 662 1230 2437 4 v 660 1329 4 100 v 968 1299 a(T)-7 b(otal)p 1185 1329 V 156 w(25)155 b(67)g(24)182 b(5)g(0)155 b(18)g(36)p 2829 1329 V 143 w(175)p 3097 1329 V 662 1333 2437 4 v 284 1480 a(T)-7 b(able)20 b(3:)26 b(Confusion)18 b(matrix)i(for)g(testing)g(data)g(predictions)e(from)i(FFNN)h(model)e (using)h(all)g(training)f(data)p 662 2105 V 660 2204 4 100 v 1185 2204 V 1598 2174 a(Predicted)g(author)g(number)p 2829 2204 V 3097 2204 V 660 2304 V 1185 2304 V 1334 2274 a(1)197 b(2)g(3)183 b(4)f(5)197 b(6)g(7)p 2829 2304 V 99 w(T)-7 b(otal)p 3097 2304 V 662 2307 2437 4 v 660 2422 4 115 v 712 2599 a(Actual)712 2699 y(author)712 2798 y(number)1095 2392 y(1)p 1185 2422 V 1236 2310 140 4 v 1236 2418 4 108 v 127 w(26)p 1372 2418 V 1236 2421 140 4 v 225 w(1)646 b(3)197 b(1)g(3)p 2829 2422 4 115 v 185 w(34)p 3097 2422 V 660 2537 V 1095 2507 a(2)p 1185 2537 V 197 w(2)p 1475 2425 140 4 v 1475 2533 4 108 v 128 w(52)p 1611 2533 V 1475 2536 140 4 v 449 w(1)421 b(2)p 2829 2537 4 115 v 424 w(57)p 3097 2537 V 660 2651 V 1095 2621 a(3)p 1185 2651 V 197 w(1)197 b(2)p 1714 2540 140 4 v 1714 2647 4 108 v 128 w(10)p 1851 2647 V 1714 2650 140 4 v 2829 2651 4 115 v 1139 w(13)p 3097 2651 V 660 2766 V 1095 2736 a(4)p 1185 2766 V 436 w(2)p 1980 2655 98 4 v 1980 2761 4 107 v 393 w(4)p 2075 2761 V 1980 2764 98 4 v 2829 2766 4 115 v 957 w(6)p 3097 2766 V 660 2881 V 1095 2851 a(5)p 1185 2881 V 900 w(1)p 2204 2769 98 4 v 2204 2877 4 108 v 153 w(0)p 2299 2877 V 2204 2880 98 4 v 465 w(1)p 2829 2881 4 115 v 226 w(2)p 3097 2881 V 660 2996 V 1095 2966 a(6)p 1185 2996 V 197 w(2)g(2)g(1)p 2402 2884 140 4 v 2402 2992 4 108 v 576 w(16)p 2538 2992 V 2402 2995 140 4 v 2829 2996 4 115 v 452 w(21)p 3097 2996 V 660 3110 V 1095 3080 a(7)p 1185 3110 V 197 w(3)g(3)g(2)p 2641 2999 140 4 v 2641 3106 4 108 v 815 w(34)p 2777 3106 V 2641 3109 140 4 v 2829 3110 4 115 v 213 w(42)p 3097 3110 V 662 3114 2437 4 v 660 3213 4 100 v 968 3183 a(T)-7 b(otal)p 1185 3213 V 156 w(34)155 b(62)g(13)182 b(6)g(3)155 b(19)g(38)p 2829 3213 V 143 w(175)p 3097 3213 V 662 3217 2437 4 v 295 3364 a(T)-7 b(able)20 b(4:)26 b(Confusion)18 b(matrix)i(for)f(testing)i(data)f(predictions)e(from)i(MD)m(A)g(model)f (using)h(all)g(training)f(data)p 662 3989 V 660 4088 4 100 v 1185 4088 V 1598 4058 a(Predicted)g(author)g(number)p 2829 4088 V 3097 4088 V 660 4188 V 1185 4188 V 1334 4158 a(1)197 b(2)g(3)183 b(4)f(5)197 b(6)g(7)p 2829 4188 V 99 w(T)-7 b(otal)p 3097 4188 V 662 4191 2437 4 v 660 4306 4 115 v 712 4483 a(Actual)712 4583 y(author)712 4682 y(number)1095 4276 y(1)p 1185 4306 V 1236 4194 140 4 v 1236 4302 4 108 v 127 w(28)p 1372 4302 V 1236 4305 140 4 v 225 w(1)197 b(2)885 b(3)p 2829 4306 4 115 v 185 w(34)p 3097 4306 V 660 4421 V 1095 4391 a(2)p 1185 4421 V 1475 4309 140 4 v 1475 4417 4 108 v 367 w(57)p 1611 4417 V 1475 4420 140 4 v 2829 4421 4 115 v 1378 w(57)p 3097 4421 V 660 4535 V 1095 4505 a(3)p 1185 4535 V 1714 4424 140 4 v 1714 4531 4 108 v 606 w(13)p 1851 4531 V 1714 4534 140 4 v 2829 4535 4 115 v 1139 w(13)p 3097 4535 V 660 4650 V 1095 4620 a(4)p 1185 4650 V 436 w(2)p 1980 4539 98 4 v 1980 4645 4 107 v 393 w(4)p 2075 4645 V 1980 4648 98 4 v 2829 4650 4 115 v 957 w(6)p 3097 4650 V 660 4765 V 1095 4735 a(5)p 1185 4765 V 436 w(1)p 2204 4653 98 4 v 2204 4760 4 107 v 617 w(1)p 2299 4760 V 2204 4763 98 4 v 2829 4765 4 115 v 733 w(2)p 3097 4765 V 660 4880 V 1095 4850 a(6)p 1185 4880 V 436 w(5)p 2402 4768 140 4 v 2402 4876 4 108 v 815 w(16)p 2538 4876 V 2402 4879 140 4 v 2829 4880 4 115 v 452 w(21)p 3097 4880 V 660 4994 V 1095 4964 a(7)p 1185 4994 V 197 w(1)197 b(5)g(2)183 b(2)p 2641 4883 140 4 v 2641 4990 4 108 v 590 w(32)p 2777 4990 V 2641 4993 140 4 v 2829 4994 4 115 v 213 w(42)p 3097 4994 V 662 4998 2437 4 v 660 5097 4 100 v 968 5067 a(T)-7 b(otal)p 1185 5097 V 156 w(29)155 b(71)g(17)182 b(6)g(1)155 b(16)g(35)p 2829 5097 V 143 w(175)p 3097 5097 V 662 5101 2437 4 v 318 5248 a(T)-7 b(able)20 b(5:)25 b(Confusion)19 b(matrix)g(for)h(testing)g(data)g(predictions)f(from)g (CBR)j(model)d(using)h(all)h(training)e(data)p eop %%Page: 6 6 6 5 bop 409 -6 2942 4 v 407 93 4 100 v 459 64 a Fd(Model)p 1469 93 V 843 w(T)m(raining)19 b(1)p 1914 93 V 99 w(T)m(raining)g(2)p 2359 93 V 99 w(T)m(raining)g(1)h(and)g(2)p 3007 93 V 99 w(T)-6 b(esting)p 3350 93 V 409 97 2942 4 v 407 196 4 100 v 459 167 a(MD)m(A)20 b(\(using)g(50\045)g(training\))p 1469 196 V 291 w(98.9\045)p 1914 196 V 229 w(79.3\045)p 2359 196 V 432 w(89.2\045)p 3007 196 V 127 w(84.6\045)p 3350 196 V 407 296 V 459 266 a(MD)m(A)g(\(using)g(100\045)f(training\)) p 1469 296 V 250 w(93.3\045)p 1914 296 V 229 w(85.1\045)p 2359 296 V 432 w(89.2\045)p 3007 296 V 127 w(81.1\045)p 3350 296 V 407 396 V 459 366 a(CBR)j(\(using)d(50\045)h(training\))p 1469 396 V 316 w(87.6\045)p 1914 396 V 229 w(81.6\045)p 2359 396 V 432 w(84.7\045)p 3007 396 V 127 w(86.3\045)p 3350 396 V 407 495 V 459 465 a(CBR)i(\(using)d(100\045)h(training\))p 1469 495 V 274 w(88.8\045)p 1914 495 V 229 w(80.6\045)p 2359 495 V 432 w(84.7\045)p 3007 495 V 127 w(88.0\045)p 3350 495 V 407 595 V 459 565 a(FFNN)h(\(using)f(100\045)f(training\))p 1469 595 V 228 w(98.9\045)p 1914 595 V 229 w(79.3\045)p 2359 595 V 432 w(89.2\045)p 3007 595 V 127 w(81.1\045)p 3350 595 V 409 598 2942 4 v 1162 746 a(T)-7 b(able)20 b(6:)25 b(Results)c(for)f(discriminating)e(models)-9 1096 y(models)k(were)g(better)g(here)h(it)g(w)o(ould)f(seem)h(lik)o (ely)f(that)h(their)-9 1196 y(performance)c(w)o(ould)i(suf)n(fer)h (more)f(from)g(losing)h(data)g(when)-9 1296 y(compared)g(to)j(models)f (using)h(actual)f(classi\002cation)h(bound-)-9 1395 y(aries.)-9 1668 y Fe(Refer)n(ences)-9 1853 y Fd([1])40 b(A.)i(Gray)-5 b(,)46 b(P)-9 b(.)42 b(Sallis,)49 b(and)41 b(S.)h(MacDonell.)98 b(Identi-)129 1953 y(\002ed)20 b(\(inte)o(grated)e(dictionary-based)g (e)o(xtraction)g(of)i(non-)129 2053 y(language-dependent)15 b(tok)o(en)20 b(information)e(for)i(forensic)129 2152 y(identi\002cation,)g(e)o(xamination,)f(and)h(discrimination\):)25 b(A)129 2252 y(dictionary-based)18 b(system)j(for)f(e)o(xtracting)f (source)i(code)129 2352 y(metrics)28 b(for)g(softw)o(are)g(forensics.) 54 b(In)28 b Fb(Pr)l(oceedings)g(of)129 2451 y(SE:E&P'98)13 b(\(Softwar)m(e)i(Engineering:)20 b(Education)14 b(and)129 2551 y(Pr)o(actice)22 b(Confer)m(ence\))p Fd(,)f(pages)h(252\226259.)d (IEEE)j(Com-)129 2650 y(puter)d(Society)h(Press,)h(1998.)-9 2800 y([2])40 b(I.)27 b(Krsul)f(and)g(E.)h(H.)g(Spaf)n(ford.)48 b(Authorship)25 b(analysis:)129 2900 y(Identifying)20 b(the)i(author)f(of)h(a)h(program.)33 b Fb(Computer)o(s)23 b(&)129 2999 y(Security)p Fd(,)c(16\(3\):233\226256,)c(1997.)-9 3149 y([3])40 b(P)-9 b(.)38 b(Sallis,)k(A.)c(Aakjaer)m(,)i(and)d(S.)h (MacDonell.)83 b(Soft-)129 3248 y(w)o(are)35 b(forensics:)54 b(Old)35 b(methods)f(for)g(a)h(ne)n(w)g(science.)2078 1096 y(In)19 b Fb(Pr)l(oceedings)f(of)i(SE:E&P'96)d(\(Softwar)m(e)i (Engineer)n(-)2078 1196 y(ing:)74 b(Education)43 b(and)h(Pr)o(actice\)) p Fd(,)50 b(pages)45 b(367\226371.)2078 1296 y(IEEE)19 b(Computer)g(Society)h(Press,)h(1996.)1939 1483 y([4])41 b(P)-9 b(.)28 b(Sallis,)j(S.)e(MacDonell,)g(G.)g(MacLennan,)f(A.)h (Gray)-5 b(,)2078 1582 y(and)33 b(R.)i(Kilgour)-5 b(.)72 b(Identi\002ed:)52 b(Softw)o(are)34 b(authorship)2078 1682 y(analysis)f(with)g(case-based)f(reasoning.)68 b(In)33 b Fb(Pr)l(oceed-)2078 1781 y(ings)22 b(of)g(the)f(Addendum)f(Session)i (of)g(the)g(1997)e(Interna-)2078 1881 y(tional)j(Confer)m(ence)g(on)h (Neur)o(al)g(Information)e(Pr)l(ocess-)2078 1981 y(ing)e(and)g (Intellig)o(ent)f(Information)g(Systems)p Fd(,)i(pages)g(53\226)2078 2080 y(56,)e(1998.)1939 2267 y([5])41 b(M.)17 b(Shepperd)d(and)j(C.)g (Scho\002eld.)k(Estimating)16 b(softw)o(are)2078 2367 y(project)25 b(ef)n(fort)g(using)h(analogies.)48 b Fb(IEEE)26 b(T)-5 b(r)o(ansactions)2078 2467 y(on)19 b(Softwar)m(e)h(Engineering)p Fd(,)e(23\(11\):736\226743,)d(1997.)1939 2654 y([6])41 b(E.)24 b(H.)g(Spaf)n(ford)e(and)i(S.)g(A.)h(W)-7 b(eeber)i(.)42 b(Softw)o(are)23 b(foren-)2078 2753 y(sics:)29 b(Can)22 b(we)g(track)f(code)g(to)g(its)i(authors?)41 b Fb(Computer)o(s)2078 2853 y(&)20 b(Security)p Fd(,)g(12:585\226595,)c(1993.)1939 3040 y([7])41 b(G.)23 b(Whale.)39 b(Softw)o(are)22 b(metrics)h(and)g (plagiarism)f(detec-)2078 3139 y(tion.)d Fb(J)n(ournal)c(of)h(Systems)g (and)f(Softwar)m(e)p Fd(,)i(13:131\226138,)2078 3239 y(1990.)p eop %%Trailer end userdict /end-hook known{end-hook}if %%EOF