Newer
Older
Discussion_Papers / Papers / 1999 / 99-12 / Iconip99for.ps
%!PS-Adobe-2.0
%%Creator: dvips 5.83 (MiKTeX 1.20) Copyright 1998 Radical Eye Software
%%Title: forensics.dvi
%%CreationDate: Fri Apr 16 11:20:19 1999
%%Pages: 6
%%PageOrder: Ascend
%%BoundingBox: 0 0 596 842
%%DocumentFonts: Times-Roman Times-Bold Times-Italic
%%EndComments
%DVIPSWebPage: (www.radicaleye.com)
%DVIPSCommandLine: dvips forensics.dvi -o forensics.ps
%DVIPSParameters: dpi=600, compressed
%DVIPSSource:  TeX output 1999.04.16:1120
%%BeginProcSet: texc.pro
%!
/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S
N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72
mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0
0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{
landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize
mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[
matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round
exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{
statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0]
N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin
/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array
/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2
array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N
df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A
definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get
}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub}
B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr
1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3
1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx
0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx
sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{
rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp
gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B
/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{
/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{
A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy
get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse}
ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp
fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17
{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add
chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{
1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop}
forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn
/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put
}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{
bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A
mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{
SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{
userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X
1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4
index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N
/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{
/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT)
(LaserWriter 16/600)]{A length product length le{A length product exch 0
exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse
end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask
grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot}
imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round
exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto
fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p
delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M}
B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{
p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S
rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end

%%EndProcSet
%%BeginProcSet: 8r.enc
% @@psencodingfile@{
%   author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry",
%   version = "0.6",
%   date = "1 July 1998",
%   filename = "8r.enc",
%   email = "tex-fonts@@tug.org",
%   docstring = "Encoding for TrueType or Type 1 fonts
%                to be used with TeX."
% @}
% 
% Idea is to have all the characters normally included in Type 1 fonts
% available for typesetting. This is effectively the characters in Adobe
% Standard Encoding + ISO Latin 1 + extra characters from Lucida.
% 
% Character code assignments were made as follows:
% 
% (1) the Windows ANSI characters are almost all in their Windows ANSI
% positions, because some Windows users cannot easily reencode the
% fonts, and it makes no difference on other systems. The only Windows
% ANSI characters not available are those that make no sense for
% typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen
% (173). quotesingle and grave are moved just because it's such an
% irritation not having them in TeX positions.
% 
% (2) Remaining characters are assigned arbitrarily to the lower part
% of the range, avoiding 0, 10 and 13 in case we meet dumb software.
% 
% (3) Y&Y Lucida Bright includes some extra text characters; in the
% hopes that other PostScript fonts, perhaps created for public
% consumption, will include them, they are included starting at 0x12.
% 
% (4) Remaining positions left undefined are for use in (hopefully)
% upward-compatible revisions, if someday more characters are generally
% available.
% 
% (5) hyphen appears twice for compatibility with both 
% ASCII and Windows.
% 
/TeXBase1Encoding [
% 0x00 (encoded characters from Adobe Standard not in Windows 3.1)
  /.notdef /dotaccent /fi /fl
  /fraction /hungarumlaut /Lslash /lslash
  /ogonek /ring /.notdef
  /breve /minus /.notdef 
% These are the only two remaining unencoded characters, so may as
% well include them.
  /Zcaron /zcaron 
% 0x10
 /caron /dotlessi 
% (unusual TeX characters available in, e.g., Lucida Bright)
 /dotlessj /ff /ffi /ffl 
 /.notdef /.notdef /.notdef /.notdef
 /.notdef /.notdef /.notdef /.notdef
 % very contentious; it's so painful not having quoteleft and quoteright
 % at 96 and 145 that we move the things normally found there to here.
 /grave /quotesingle 
% 0x20 (ASCII begins)
 /space /exclam /quotedbl /numbersign
 /dollar /percent /ampersand /quoteright
 /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash
% 0x30
 /zero /one /two /three /four /five /six /seven
 /eight /nine /colon /semicolon /less /equal /greater /question
% 0x40
 /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O
% 0x50
 /P /Q /R /S /T /U /V /W
 /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
% 0x60
 /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o
% 0x70
 /p /q /r /s /t /u /v /w
 /x /y /z /braceleft /bar /braceright /asciitilde
 /.notdef % rubout; ASCII ends
% 0x80
 /.notdef /.notdef /quotesinglbase /florin
 /quotedblbase /ellipsis /dagger /daggerdbl
 /circumflex /perthousand /Scaron /guilsinglleft
 /OE /.notdef /.notdef /.notdef
% 0x90
 /.notdef /.notdef /.notdef /quotedblleft
 /quotedblright /bullet /endash /emdash
 /tilde /trademark /scaron /guilsinglright
 /oe /.notdef /.notdef /Ydieresis
% 0xA0
 /.notdef % nobreakspace
 /exclamdown /cent /sterling
 /currency /yen /brokenbar /section
 /dieresis /copyright /ordfeminine /guillemotleft
 /logicalnot
 /hyphen % Y&Y (also at 45); Windows' softhyphen
 /registered
 /macron
% 0xD0
 /degree /plusminus /twosuperior /threesuperior
 /acute /mu /paragraph /periodcentered
 /cedilla /onesuperior /ordmasculine /guillemotright
 /onequarter /onehalf /threequarters /questiondown
% 0xC0
 /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
 /Egrave /Eacute /Ecircumflex /Edieresis
 /Igrave /Iacute /Icircumflex /Idieresis
% 0xD0
 /Eth /Ntilde /Ograve /Oacute
 /Ocircumflex /Otilde /Odieresis /multiply
 /Oslash /Ugrave /Uacute /Ucircumflex
 /Udieresis /Yacute /Thorn /germandbls
% 0xE0
 /agrave /aacute /acircumflex /atilde
 /adieresis /aring /ae /ccedilla
 /egrave /eacute /ecircumflex /edieresis
 /igrave /iacute /icircumflex /idieresis
% 0xF0
 /eth /ntilde /ograve /oacute
 /ocircumflex /otilde /odieresis /divide
 /oslash /ugrave /uacute /ucircumflex
 /udieresis /yacute /thorn /ydieresis
] def

%%EndProcSet
%%BeginProcSet: texps.pro
%!
TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2
index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll
exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics
exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub
dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def}
ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict
end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{
dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1
roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def
dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def}
if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def}
def end

%%EndProcSet
TeXDict begin 39158280 55380996 1000 600 600
(d:\Users\argray\work\publications\Papers\1999\Forensics (ICONIP)/forensics.dvi)
@start
%DVIPSBitmapFont: Fa cmsy10 10 1
/Fa 1 121 df<137E3801FFC03807C1E0380F0070001E1338003E131C48130C141E147E
5AA3143C1400A3127CA37E121E7E6C7E6C7EEA00F013FCEA03FF380F8780381F01E0003E
13F0EB00F848137CA200FC133E5A141FA6127C143F6C133EA26C137CEA0F80000713F838
01E1F03800FFC0EB3F00130FEB03C0EB01E0EB00F01478147C143EA3141FA3123C127EA3
143E127812300038137C6C13786C13F0380783E03803FF8038007E00184C7ABA25>120
D E
%EndDVIPSBitmapFont
/Fb 134[37 1[55 1[42 23 32 32 1[42 42 42 60 23 2[23 42
42 23 37 42 37 1[42 12[46 42 2[51 1[55 3[37 28 3[51 1[55
1[51 6[28 42 42 42 42 4[42 3[28 3[28 28 28 65 38[{
TeXBase1Encoding ReEncodeFont}39 83.022 /Times-Italic
rf /Fc 134[50 1[72 1[55 33 39 44 1[55 50 55 83 28 55
1[28 1[50 1[44 55 44 55 50 18[72 94 8[72 72 14[50 50
50 50 2[25 33 45[{TeXBase1Encoding ReEncodeFont}29 99.6264
/Times-Bold rf /Fd 105[42 1[37 37 24[37 42 42 60 42 42
23 32 28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37
42 37 3[28 1[28 1[60 1[78 60 60 51 46 55 60 46 60 60
74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 3[23 23 42
42 42 42 42 42 42 42 42 42 23 21 28 21 47 1[28 28 28
1[69 1[42 31[46 46 2[{TeXBase1Encoding ReEncodeFont}79
83.022 /Times-Roman rf /Fe 138[66 40 47 53 66 66 60 66
100 33 2[33 66 1[40 53 66 53 66 60 12[80 1[86 8[47 5[86
1[86 11[60 60 60 60 60 3[40 45[{TeXBase1Encoding ReEncodeFont}29
119.552 /Times-Bold rf /Ff 133[44 50 50 72 50 50 28 39
33 1[50 50 50 78 28 2[28 50 50 33 44 50 44 1[44 6[61
4[72 1[55 66 1[55 72 72 89 61 2[33 1[72 2[72 1[66 72
92 6[50 50 50 50 50 50 50 1[50 2[25 1[25 56 1[33 33 40[{
TeXBase1Encoding ReEncodeFont}50 99.6264 /Times-Roman
rf /Fg 134[72 1[104 1[72 40 56 48 1[72 72 72 112 40 72
1[40 72 72 48 64 72 64 72 64 13[80 96 1[80 1[104 128
6[80 1[104 96 96 104 19[48 36 44[{TeXBase1Encoding ReEncodeFont}33
143.462 /Times-Roman rf end
%%EndProlog
%%BeginSetup
%%Feature: *Resolution 600dpi
TeXDict begin
%%PaperSize: A4

%%EndSetup
%%Page: 1 1
1 0 bop 9 422 a Fg(Softw)o(are)34 b(F)n(orensics)f(for)j
(Discriminating)c(between)i(Program)g(Authors)98 605
y(using)h(Case-Based)e(Reasoning,)g(Feed-F)n(orw)o(ard)f(Neural)i(Netw)
o(orks)850 788 y(and)h(Multiple)f(Discriminant)f(Analysis)331
1028 y Ff(Stephen)25 b(G.)g(MacDonell,)f(Andre)n(w)g(R.)h(Gray)-6
b(,)25 b(Grant)g(MacLennan,)f(and)h(Philip)f(Sallis)1173
1145 y(Department)h(of)g(Information)e(Science)1484 1261
y(Uni)n(v)o(ersity)f(of)j(Otago)1172 1377 y(PO)g(Box)g(56,)f(Dunedin,)g
(Ne)n(w)h(Zealand)988 1493 y(+64)f(3)h(4798135)f(\(phone\))g(+64)h(3)g
(4798311)e(\(f)o(ax\))1185 1609 y(ste)n(v)o
(emac@infoscience.otago.ac.nz)-9 1935 y Fe(Abstract)-9
2121 y Fd(Softw)o(are)37 b(forensics)g(is)i(a)f(research)f(\002eld)h
(that,)43 b(by)37 b(treat-)-9 2220 y(ing)20 b(pieces)h(of)f(program)f
(source)h(code)g(as)h(linguistically)f(and)-9 2320 y(stylistically)25
b(analyzable)g(entities,)h(attempts)g(to)g(in)m(v)o(estigate)-9
2420 y(aspects)h(of)g(computer)f(program)f(authorship.)44
b(This)28 b(can)f(be)-9 2519 y(performed)g(with)j(the)g(goal)g(of)g
(identi\002cation,)h(discrimina-)-9 2619 y(tion,)h(or)e
(characterization)f(of)h(authors.)55 b(In)30 b(this)h(paper)e(we)-9
2718 y(e)o(xtract)34 b(a)i(set)g(of)f(26)g(standard)f(authorship)g
(metrics)h(from)-9 2818 y(351)23 b(programs)g(by)h(7)h(dif)n(ferent)e
(authors.)36 b(The)25 b(use)f(of)h(feed-)-9 2918 y(forw)o(ard)33
b(neural)g(netw)o(orks,)k(multiple)d(discriminant)f(anal-)-9
3017 y(ysis,)g(and)d(case-based)f(reasoning)g(is)j(then)d(in)m(v)o
(estigated)g(in)-9 3117 y(terms)21 b(of)g(classi\002cation)h(accurac)o
(y)e(for)h(the)g(authors)g(on)g(both)-9 3217 y(training)k(and)h
(testing)g(samples.)43 b(The)26 b(\002rst)i(tw)o(o)e(techniques)-9
3316 y(produce)31 b(remarkably)g(similar)j(results,)j(with)d(the)f
(best)h(re-)-9 3416 y(sults)j(coming)e(from)h(the)h(case-based)f
(reasoning)f(models.)-9 3515 y(All)16 b(techniques)f(ha)n(v)o(e)g(high)
h(prediction)e(accurac)o(y)g(rates,)j(sup-)-9 3615 y(porting)h(the)i
(feasibility)g(of)g(the)g(task)h(of)f(discriminating)e(pro-)-9
3715 y(gram)h(authors)g(based)h(on)g(source-code)d(measurements.)-9
3995 y Fe(1)119 b(Intr)n(oduction)-9 4181 y Fd(In)28
b(a)h(surprisingly)e(lar)o(ge)h(number)f(of)i(situations)f(there)h(is)g
(a)-9 4281 y(need)22 b(to)h(in)m(v)o(estigate)f(the)h(nature)f(of)h(a)g
(computer)f(program')-5 b(s)-9 4381 y(authorship.)24
b(By)c(this)i(it)f(is)g(meant,)f(that)h(there)f(is)h(some)f(ques-)-9
4480 y(tion)k(concerning)e(the)j(authorship)e(of)i(a)g(series)g(of)g
(programs)-9 4580 y(or)j(alternati)n(v)o(ely)g(the)h(characteristics)g
(of)g(program)e(authors)-9 4679 y([3)o(].)115 4779 y(The)d(most)g
(widely)g(kno)n(wn)f(e)o(xample)g(is)i(plagiarism)e(de-)-9
4879 y(tection)j(in)g(an)h(academic)e(setting)h(where)g(students')g
(assign-)-9 4978 y(ments)17 b(can)h(be)g(compared)e(to)i(see)g(if)g
(some)g(are)g(\223suspiciously)-9 5078 y(similar\224)k([7)o(].)33
b(The)22 b(incidence)g(of)g(highly)g(similar)h(programs)-9
5178 y(can)17 b(pro)o(vide)e(suggesti)n(v)o(e)i(e)n(vidence)f(that)h
(one)g(student')-5 b(s)18 b(code)-9 5277 y(may)k(ha)n(v)o(e)h(been)g
(deri)n(v)o(ed)f(from)g(another')-5 b(s.)34 b(This)23
b(particular)-9 5377 y(area)k(of)h(research)e(pro)o(vided)g(the)i
(origins)e(of)i(the)g(ideas)g(that)-9 5477 y(no)n(w)i(mak)o(e)g(up)g
(the)h(\002eld)f(of)h(softw)o(are)f(forensics)g(\226)g(which)-9
5576 y(is)c(de\002ned)f(here)h(as)g(the)g(study)g(of)g(program)d
(characteristics)1939 1935 y(with)16 b(the)g(intention)f(of)g
(identifying,)g(e)o(xamining,)f(or)h(discrim-)1939 2035
y(inating)20 b(between)f(program)f(authors)h([1)o(].)2064
2152 y(Softw)o(are)34 b(forensics)h(also)g(includes)g(the)g(areas)g(of)
g(au-)1939 2252 y(thorship)f(characterization,)j(as)e(in)h
(psychological)c(studies)1939 2352 y(of)25 b(the)g(relationship)f
(between)g(programmer)e(attrib)n(utes)j(and)1939 2451
y(their)34 b(code)f(and)h(between)f(programming)e(conditions)h(and)1939
2551 y(code.)60 b(The)31 b(analysis)h(of)g(malicious)f(code)h(\(such)f
(as)h(com-)1939 2651 y(puter)24 b(viruses,)h(softw)o(are)f(trapdoors,)f
(and)h(trojan)f(horses\))h(is)1939 2750 y(another)31
b(application)f(area,)k(although)d(this)h(in)m(v)n(olv)o(es)e(more)1939
2850 y(subjecti)n(v)o(e)24 b(analysis)h([6)o(].)40 b(Other)25
b(applications)e(of)i(softw)o(are)1939 2949 y(forensics)f(include)f
(quality)g(control)g(\(through)e(coding)i(stan-)1939
3049 y(dards)f(for)g(e)o(xample,)g(c)o(yclomatic)f(comple)o(xity)g(or)h
(comment)1939 3149 y(density)30 b(which)g(can)g(be)g(used)g(as)h(an)f
(indicator)f(of)g(internal)1939 3248 y(documentation)d(quality\),)k
(author)d(tracking)h(\(for)f(e)o(xample,)1939 3348 y(determining)36
b(the)h(author)g(of)g(code)g(of)g(unkno)n(wn)e(origin\),)1939
3448 y(change)20 b(control)g(\(tracking)g(the)h(authorship)e(of)i
(changes)f(and)1939 3547 y(quality)25 b(control)f(when)h(making)f
(changes\),)h(and)f(o)n(wnership)1939 3647 y(disputes.)2064
3765 y(While)30 b(the)f(idea)g(of)h(dissenting)e(the)i(structure)e(and)
h(na-)1939 3864 y(ture)20 b(of)f(programs)f(to)h(discern)g(some)h
(information)d(about)i(the)1939 3964 y(lik)o(ely)g(author)e(or)i
(authors)e(and/or)h(their)g(characteristics)g(may)1939
4064 y(appear)27 b(some)n(what)f(esoteric,)j(perhaps)d(e)n(v)o(en)h
(unrealistic,)h(it)1939 4163 y(has)20 b(been)f(sho)n(wn)g(that)h(such)f
(acti)n(vities)h(are)g(feasible,)f(at)h(least)1939 4263
y(under)f(certain)g(circumstances)g([2)o(].)25 b(In)19
b(f)o(act)h(man)o(y)f(measure-)1939 4362 y(ments)27 b(can)g(be)g(dif)n
(\002cult)g(for)f(programmers)f(to)i(change)f([6)o(].)1939
4462 y(An)i(open)e(question)g(is)i(ho)n(w)f(such)g(models)g(should)f
(be)i(con-)1939 4562 y(structed)j(to)h(best)g(represent)f(the)h
(mappings)e(between)h(pro-)1939 4661 y(gram)19 b(features,)g(authors,)g
(and)g(the)h(authors')e(characteristics.)2064 4779 y(In)31
b(this)g(paper)f(the)h(focus)f(will)i(be)e(on)h(the)g(area)f(of)h(de-)
1939 4879 y(v)o(eloping)22 b(models)g(that)i(are)f(capable)f(of)h
(discriminating)f(be-)1939 4978 y(tween)36 b(se)n(v)o(eral)g(authors)f
(using)h(source-code)d(based)j(mea-)1939 5078 y(surements.)50
b(The)28 b(measurements)f(that)h(are)h(preferred)d(here)1939
5178 y(are)45 b(those)g(that)g(can)g(be)g(automatically)f(e)o(xtracted)
g(from)1939 5277 y(source)29 b(code)f(by)h(pattern)f(matching)g
(algorithms)f(since)j(the)1939 5377 y(v)n(olumes)19 b(of)h(data)g
(needed)e(for)h(these)h(applications)f(will)i(gen-)1939
5477 y(erally)i(surpass)h(con)m(v)o(enient)d(human)g(measurement.)33
b(Appli-)1939 5576 y(cations)d(for)f(such)h(authorship)e
(discrimination)g(procedures)p eop
%%Page: 2 2
2 1 bop -9 74 a Fd(include)28 b(plagiarism)g(detection,)j(o)n(wnership)
c(disputes,)k(and)-9 173 y(the)20 b(psychological)e(study)h(of)h
(programmers.)-9 516 y Fe(2)119 b(T)-11 b(echniques)63
b(f)m(or)e(authorship)h(dis-)170 666 y(crimination)-9
890 y Fc(2.1)99 b(Neural)25 b(netw)o(orks)-9 1067 y Fd(There)16
b(are)h(a)g(v)n(ast)h(number)d(of)i(neural)f(netw)o(ork)g
(architectures)-9 1167 y(and)22 b(training)g(algorithms)f(contained)g
(within)i(the)g(literature.)-9 1266 y(The)15 b(most)h(commonly)e(used)h
(architecture)g(for)g(most)h(applica-)-9 1366 y(tions)22
b(is)h(that)g(of)f(a)h(feed-forw)o(ard)c(neural)j(netw)o(ork)f
(\(FFNN\),)-9 1466 y(which)i(is)i(still)g(generally)e(trained)g(using)g
(some)h(form)f(of)h(the)-9 1565 y(back-propagation)15
b(algorithm.)115 1676 y(The)23 b(main)g(issues)h(when)e(using)h(this)h
(approach)c(concern)-9 1776 y(selecting)c(the)g(optimal)f(architecture)
g(for)h(the)g(netw)o(ork)f(and)h(in)-9 1876 y(stopping)h(the)h
(training)f(\(usually)h(by)g(using)g(data)g(set)h(splitting)-9
1975 y(and)f(stopping)f(training)g(when)h(a)h(v)n(alidation)e(data)i
(set)g(error)f(is)-9 2075 y(minimized\).)45 b(The)28
b(use)g(of)f(data)h(set)g(splitting)g(can)f(be)h(seen)-9
2175 y(as)19 b(a)f(disadv)n(antage,)f(since)h(this)h(reduces)f(the)g
(amount)f(of)h(data)-9 2274 y(a)n(v)n(ailable)h(for)h(the)g(netw)o(ork)
f(to)h(learn)g(the)g(relationships.)115 2385 y(More)26
b(sophisticated)f(approached)f(that)i(do)g(not)g(require)-9
2485 y(hold-out)f(samples)i(are)g(not)g(in)m(v)o(estigated)f(here)g(as)
i(the)o(y)f(are)-9 2584 y(lik)o(ely)35 b(to)h(be)g(less)h(accessible)f
(to)g(researchers)f(in)h(applied)-9 2684 y(\002elds.)-9
2985 y Fc(2.2)99 b(Discriminant)25 b(analysis)-9 3162
y Fd(Multiple)34 b(discriminant)f(analysis)h(\(MD)m(A\))g(is)i(a)f
(statistical)-9 3262 y(technique)21 b(that)j(separates)g(observ)n
(ations)d(into)j(tw)o(o)f(or)h(more)-9 3361 y(groups)e(based)i(on)g(se)
n(v)o(eral)g(orthogonal)d(linear)j(functions)f(of)-9
3461 y(the)34 b(independent)d(v)n(ariables.)65 b(The)34
b(technique)e(assumes)i(a)-9 3561 y(reasonable)17 b(de)o(gree)g(of)h
(multi)n(v)n(ariate)f(normality)-5 b(,)17 b(with)h(logis-)-9
3660 y(tic)25 b(re)o(gression)f(an)i(alternati)n(v)o(e)e(where)g(this)i
(is)g(not)f(the)h(case.)-9 3760 y(Ho)n(we)n(v)o(er)d(ordinary)f
(logistic)j(re)o(gression)e(is)j(more)e(suited)h(to)-9
3860 y(binary)15 b(outcomes,)h(and)g(will)h(not)g(be)f(discussed)h
(further)e(here.)115 3971 y(A)24 b(signi\002cant)f(adv)n(antage)f(of)h
(discriminant)f(analysis)i(as)-9 4070 y(a)k(technique)f(is)i(the)f
(easy)g(a)n(v)n(ailability)g(of)g(stepwise)g(proce-)-9
4170 y(dures)16 b(for)h(controlling)e(the)i(entry)f(and)h(remo)o(v)n
(al)e(of)i(v)n(ariables.)-9 4270 y(By)22 b(w)o(orking)e(with)i(only)f
(those)g(necessary)g(v)n(ariables)g(we)h(in-)-9 4369
y(crease)27 b(the)h(chance)e(of)i(the)g(model)e(being)h(able)g(to)h
(general-)-9 4469 y(ize)23 b(to)h(ne)n(w)f(sets)h(of)f(data.)35
b(In)23 b(addition,)f(the)i(data)f(collection)-9 4568
y(costs)17 b(can)h(be)f(reduced,)f(sometimes)h(signi\002cantly)-5
b(,)16 b(by)h(w)o(ork-)-9 4668 y(ing)i(with)i(a)f(smaller)h(set)g(of)e
(v)n(ariables.)115 4779 y(Another)28 b(adv)n(antage)g(of)h(the)h
(technique)e(is)j(that)f(it)g(pro-)-9 4879 y(vides)c(probability)e
(information)g(for)i(the)g(predictions,)h(both)-9 4978
y(in)i(terms)h(of)f(the)h(conditional)e(probability)g(of)h(an)g(observ)
n(a-)-9 5078 y(tion)23 b(belonging)f(to)i(a)h(particular)d(class)j(gi)n
(v)o(en)e(its)i(classi\002ca-)-9 5178 y(tion)18 b(and)g(the)g
(conditional)f(probability)f(that)j(a)f(particular)g(ob-)-9
5277 y(serv)n(ation)k(will)i(be)f(classi\002ed)h(as)g(belonging)d(to)i
(a)h(particular)-9 5377 y(class)i(gi)n(v)o(en)e(its)j(real)e(class.)42
b(In)25 b(a)h(le)o(gal)f(setting)h(such)f(infor)n(-)-9
5477 y(mation)19 b(w)o(ould)g(certainly)g(be)h(required)f(if)h(softw)o
(are)g(forensic)-9 5576 y(results)g(were)g(to)g(be)h(accepted)e(as)i(e)
n(vidence.)1939 74 y Fc(2.3)100 b(Case-based)25 b(r)n(easoning)1939
245 y Fd(Case-based)d(reasoning)e(\(CBR\))j(is)g(a)f(method)e(for)h
(modeling)1939 344 y(the)29 b(relationship)e(between)h(a)i(series)f(of)
f(independent)e(v)n(ari-)1939 444 y(ables)i(and)g(one)f(or)h(more)f
(dependent)f(v)n(ariables)h(by)g(storing)1939 543 y(the)21
b(cases)h(\(observ)n(ations\))c(in)j(a)h(database.)k(When)21
b(presented)1939 643 y(with)34 b(a)f(ne)n(w)g(observ)n(ation,)h(the)f
(cases)h(that)f(are)g(similar)g(in)1939 743 y(terms)20
b(of)e(the)i(independent)c(v)n(ariables)i(are)i(retrie)n(v)o(ed)d(and)i
(the)1939 842 y(dependent)26 b(v)n(ariables)i(calculated)f(from)g(them)
h(using)g(some)1939 942 y(form)20 b(of)f(\223a)n(v)o(eraging\224)f
(process.)2064 1050 y(CBR)k(has)e(the)g(adv)n(antages)e(of)i(not)g
(requiring)e(an)o(y)h(distri-)1939 1149 y(b)n(utional)c(assumptions)g
Fb(per)i(se)f Fd(b)n(ut)g(does)g(require)f(the)h(speci\002-)1939
1249 y(cation)f(of)h(a)g(distance)f(metric)g(\(for)g(\002nding)f(the)i
(closest)g(e)o(x)o(em-)1939 1348 y(plars)i(to)h(the)f(presented)f(case)
h(and)g(calculating)f(their)g(similar)n(-)1939 1448 y(ity\).)25
b(Scaling)20 b(\(if)g(an)o(y)f(is)i(used\))e(when)g(measuring)g
(similarity)1939 1548 y(can)24 b(be)h(based)f(on)f(ranges)h(or)g
(standardized)e(v)n(alues)i(if)h(some)1939 1647 y(distrib)n(utional)f
(assumptions)h(are)g(made.)39 b(The)25 b(other)g(aspect)1939
1747 y(that)d(requires)f(some)h(thought)e(is)j(the)f(selection)g(of)g
(a)g(method)1939 1847 y(for)g(combining)e(the)i(cases.)31
b(Again,)22 b(a)h(simple)f(weighted)f(a)n(v-)1939 1946
y(erage)16 b(approach)d(can)j(be)g(used)f(once)g(the)h(distance)g
(metric)f(has)1939 2046 y(been)22 b(decided)f(on,)i(with)f(perhaps)f
(some)i(po)n(wer)e(of)h(distance)1939 2145 y(used)16
b(to)g(increase)f(the)h(in\003uence)f(of)h(closer)f(observ)n(ations)f
(and)1939 2245 y(reduce)33 b(the)h(in\003uence)f(of)h(outliers.)66
b(In)33 b(most)h(implemen-)1939 2345 y(tations)27 b(a)g(threshold)e(of)
h(similarity)h(or)f(a)h(limit)g(of)f(\223related\224)1939
2444 y(cases)f(is)g(used)e(to)h(pre)n(v)o(ent)f(all)h(stored)f(cases)i
(in\003uencing)d(all)1939 2544 y(predictions.)2064 2652
y(One)16 b(particular)f(case-based)h(reasoning)e(system)j(that)f(has)
1939 2751 y(been)34 b(pre)n(viously)f(used)i(for)f(softw)o(are)g
(metric)h(research)f(is)1939 2851 y(the)g(ANGEL)f(system)g([5)o(].)65
b(ANGEL)33 b(has)h(also)f(been)g(im-)1939 2951 y(plemented)20
b(as)h(part)g(of)f(the)h(IDENTIFIED)f(system)h(that)g(w)o(as)1939
3050 y(used)j(in)g(this)h(paper)e(for)g(the)h(measurement)e(e)o
(xtraction,)h(and)1939 3150 y(CBR)32 b(and)e(FFNN)h(models)f([1)o(,)h
(4)o(].)55 b(The)30 b(ANGEL)g(system)1939 3249 y(also)22
b(allo)n(ws)f(for)g(the)g(automatic)f(selection)h(of)g(rele)n(v)n(ant)f
(v)n(ari-)1939 3349 y(ables)38 b(\(at)g(some)f(considerable)f
(computational)f(cost\),)41 b(al-)1939 3449 y(though)19
b(here)h(no)f(attempt)h(will)h(be)g(made)e(to)i(select)g(an)o(y)e
(opti-)1939 3548 y(mal)i(subset)f(of)g(v)n(ariables)f(when)h(using)g
(this)g(technique.)1939 3873 y Fe(3)120 b(A)-6 b(uthorship)31
b(data)f(set)1939 4074 y Fd(The)16 b(data)g(that)g(we)g(ha)n(v)o(e)f
(chosen)g(to)h(illustrate)g(the)g(author)f(dis-)1939
4173 y(crimination)26 b(problem)g(e)o(xhibits)h(man)o(y)f(of)h(the)h
(characteris-)1939 4273 y(tics)18 b(that)e(present)g(some)h(of)f(the)g
(most)h(perple)o(xing)d(dif)n(\002culties)1939 4373 y(found)23
b(when)i(undertaking)c(such)k(analyses.)38 b(The)25 b(data)g(con-)1939
4472 y(tains)32 b(programs)d(from)h(se)n(v)o(en)h(authors)f(with)h
(widely)g(v)n(ary-)1939 4572 y(ing)22 b(amounts)f(of)h(data)h(and)e
(from)g(three)h(basic)h(source)e(types.)1939 4671 y(26)f(measures)f
(were)h(e)o(xtracted)e(for)h(each)h(program)d(using)j(the)1939
4771 y(IDENTIFIED)f(tool)h(\(T)-7 b(able)20 b(1\).)2064
4879 y(All)31 b(programs)e(were)i(written)f(in)h(standard)f(C++.)57
b(The)1939 4978 y(source)52 b(code)h(for)f(authors)g(one,)60
b(tw)o(o,)h(and)52 b(three)h(are)1939 5078 y(from)31
b(programming)d(books;)37 b(authors)30 b(four)m(,)j(\002)n(v)o(e,)h
(and)d(six)1939 5178 y(are)c(e)o(xperienced)d(commercial)h
(programmers;)j(and)e(author)1939 5277 y(se)n(v)o(en')-5
b(s)28 b(code)g(is)h(from)f(e)o(xamples)f(pro)o(vided)f(with)j(a)f
(popu-)1939 5377 y(lar)23 b(C++)g(compiler)-5 b(.)31
b(The)23 b(choice)e(of)i(program)d(sources)i(may)1939
5477 y(appear)i(unusual,)g(b)n(ut)h(it)g(w)o(as)h(felt)f(that)g(the)f
(usual)h(source)f(of)1939 5576 y(student)c(programs)e(w)o(as)j(no)f
(more)f(realistic.)p eop
%%Page: 3 3
3 2 bop 554 -6 2653 4 v 552 93 4 100 v 604 64 a Fd(Measurement)p
1164 93 V 157 w(Description)p 3205 93 V 554 97 2653 4
v 552 196 4 100 v 604 167 a(WHITE)p 1164 196 V 343 w(Proportion)18
b(of)i(lines)h(that)f(are)g(blank)p 3205 196 V 552 296
V 604 266 a(SP)-8 b(A)m(CE-1)p 1164 296 V 294 w(Proportion)18
b(of)i(operators)f(with)h(whitespace)g(on)g(both)f(sides)p
3205 296 V 552 396 V 604 366 a(SP)-8 b(A)m(CE-2)p 1164
396 V 294 w(Proportion)18 b(of)i(operators)f(with)h(whitespace)g(on)g
(left)g(side)p 3205 396 V 552 495 V 604 465 a(SP)-8 b(A)m(CE-3)p
1164 495 V 294 w(Proportion)18 b(of)i(operators)f(with)h(whitespace)g
(on)g(right)f(side)p 3205 495 V 552 595 V 604 565 a(SP)-8
b(A)m(CE-4)p 1164 595 V 294 w(Proportion)18 b(of)i(operators)f(with)h
(whitespace)g(on)g(neither)f(side)p 3205 595 V 552 695
V 604 665 a(LOCCHARS)p 1164 695 V 169 w(Mean)h(number)f(of)g
(characters)h(per)f(line)p 3205 695 V 552 794 V 604 764
a(CAPS)p 1164 794 V 404 w(Proportion)f(of)i(letters)h(that)f(are)g
(upper)f(case)p 3205 794 V 552 894 V 604 864 a(LOC)p
1164 894 V 445 w(Non-whitespace)g(lines)h(of)g(code)p
3205 894 V 552 993 V 604 964 a(DB)o(UGSYM)p 1164 993
V 197 w(Deb)n(ug)g(v)n(ariables)f(per)h(line)g(of)g(code)g(\(LOC\))p
3205 993 V 552 1093 V 604 1063 a(DB)o(UGPRN)p 1164 1093
V 216 w(Commented)f(out)h(deb)n(ug)f(print)g(statements)i(per)e(LOC)p
3205 1093 V 552 1193 V 604 1163 a(COM)p 1164 1193 V 422
w(Proportion)f(of)i(LOC)h(that)f(are)g(purely)f(comment)p
3205 1193 V 552 1292 V 604 1262 a(INLCOM)p 1164 1292
V 283 w(Proportion)f(of)i(LOC)h(that)f(ha)n(v)o(e)g(inline)g(comments)p
3205 1292 V 552 1392 V 604 1362 a(ENDCOM)p 1164 1392
V 251 w(Proportion)e(of)i(end-of-block)d(braces)j(labelled)f(with)i
(comments)p 3205 1392 V 552 1492 V 604 1462 a(GO)m(T)o(O)p
1164 1492 V 384 w(Gotos)f(per)g(non-comment)d(LOC)k(\(NCLOC\))p
3205 1492 V 552 1591 V 604 1561 a(COND-1)p 1164 1591
V 306 w(Number)e(of)h(#if)g(per)g(NCLOC)p 3205 1591 V
552 1691 V 604 1661 a(COND-2)p 1164 1691 V 306 w(Number)f(of)h(#elif)g
(per)g(NCLOC)p 3205 1691 V 552 1790 V 604 1761 a(COND-3)p
1164 1790 V 306 w(Number)f(of)h(#ifdef)f(per)h(NCLOC)p
3205 1790 V 552 1890 V 604 1860 a(COND-4)p 1164 1890
V 306 w(Number)f(of)h(#ifndef)f(per)g(NCLOC)p 3205 1890
V 552 1990 V 604 1960 a(COND-5)p 1164 1990 V 306 w(Number)g(of)h(#else)
g(per)g(NCLOC)p 3205 1990 V 552 2089 V 604 2059 a(COND-6)p
1164 2089 V 306 w(Number)f(of)h(#endif)f(per)h(NCLOC)p
3205 2089 V 552 2189 V 604 2159 a(COND)p 1164 2189 V
376 w(Conditional)f(compilation)g(k)o(e)o(yw)o(ords)f(per)i(NCLOC)p
3205 2189 V 552 2289 V 604 2259 a(CCN)p 1164 2289 V 441
w(McCabe')-5 b(s)21 b(c)o(yclomatic)e(comple)o(xity)f(number)p
3205 2289 V 552 2388 V 604 2358 a(DEC-IF)p 1164 2388
V 343 w(if)j(statements)f(per)g(NCLOC)p 3205 2388 V 552
2488 V 604 2458 a(DEC-SWITCH)p 1164 2488 V 99 w(switch)h(statements)f
(per)g(NCLOC)p 3205 2488 V 552 2587 V 604 2558 a(DEC-WHILE)p
1164 2587 V 149 w(while)h(statements)f(per)g(NCLOC)p
3205 2587 V 552 2687 V 604 2657 a(DEC)p 1164 2687 V 445
w(Decision)g(statements)h(per)e(NCLOC)p 3205 2687 V 554
2690 2653 4 v 1371 2838 a(T)-7 b(able)21 b(1:)k(The)20
b(26)g(v)n(ariables)f(used)115 3163 y(F)o(or)g(the)g(purposes)f(of)h
(testing)g(the)h(v)n(arious)e(models)g(to)i(be)-9 3262
y(de)n(v)o(eloped)i(in)j Fa(x)p Fd(4.1,)g(4.2,)h(and)e(4.3,)i(the)f(a)n
(v)n(ailable)f(data)h(w)o(as)-9 3362 y(split)17 b(\(as)g(sho)n(wn)f(in)
h(T)-7 b(able)17 b(2\))f(with)h(strati\002cation)g(\(as)g(equally)-9
3462 y(as)38 b(possible\))f(across)h(authors.)76 b(The)37
b(split)h(w)o(as)h(approxi-)-9 3561 y(mately)23 b(25\045)h(in)g(the)g
(T)m(raining)f(1)h(set,)i(25\045)e(in)g(the)g(T)m(raining)-9
3661 y(2)c(set,)h(and)e(50\045)h(in)g(the)h(T)-6 b(esting)20
b(set.)115 3826 y(In)k(some)g(cases,)h(especially)e(for)h(authors)f(4)h
(and)f(5,)i(v)o(ery)-9 3925 y(little)32 b(data)g(is)g(a)n(v)n(ailable,)
i(b)n(ut)e(this)g(can)g(be)g(seen)f(as)i(a)f(use-)-9
4025 y(ful)27 b(test)i(of)f(a)h(situation)e(certain)h(to)g(arise)g(in)g
(practice.)48 b(The)-9 4125 y(only)30 b(concern)g(here)i(is)g(that)g
(the)g(prior)e(probabilities)h(from)-9 4224 y(the)21
b(T)m(raining)e(set)j(match)f(the)g(posterior)f(probabilities)g(in)h
(the)-9 4324 y(T)-6 b(esting)25 b(set.)41 b(In)25 b(a)g
(simulation-based)f(study)g(the)i(use)f(of)g(re-)-9 4424
y(sampling)34 b(w)o(ould)g(appear)h(a)g(better)g(choice)g(to)g(assess)i
(the)-9 4523 y(techniques.)73 b(Ho)n(we)n(v)o(er)35 b(since)i(this)h
(study)e(in)m(v)n(olv)o(es)g(only)1939 3163 y(one)24
b(split)g(of)g(the)g(data)g(set,)i(the)e(use)g(of)g(strati\002cation)f
(seems)1939 3262 y(preferable)e(to)h(the)g(increased)g(ef)n(fects)f(of)
h(chance)g(bought)e(on)1939 3362 y(by)g(resampling.)1939
3657 y Fe(4)120 b(Results)1939 3864 y Fc(4.1)100 b(Neural)25
b(netw)o(ork)1939 4025 y Fd(The)k(ultimately)f(selected)h(FFNN)h(w)o
(as)g(a)f(26-9-7)e(netw)o(ork,)1939 4125 y(with)17 b(the)f(logistic)h
(transfer)e(for)h(both)f(hidden)g(and)h(output)f(lay-)1939
4224 y(ers.)25 b(The)18 b(best)g(netw)o(ork)f(found)g(w)o(as)i(trained)
e(for)g(250)h(epochs)1939 4324 y(using)i(the)g(backpropagation)c
(algorithm)j(\(learning)f(rate)j(0.2,)1939 4424 y(momentum)h(0.9\).)36
b(All)25 b(26)e(v)n(ariables)h(pro)o(vided)d(were)j(used.)1939
4523 y(Half)i(of)f(the)g(training)f(data)h(\(T)m(raining)e(1\))i(w)o
(as)h(used)f(for)g(the)p 702 4719 2357 4 v 700 4819 4
100 v 1510 4819 V 1510 4819 V 2033 4789 a(Author)p 2788
4819 V 3057 4819 V 700 4919 V 752 4889 a(Data)c(set)p
1510 4919 V 1510 4919 V 581 w(1)182 b(2)141 b(3)f(4)99
b(5)141 b(6)g(7)p 2788 4919 V 99 w(T)-7 b(otal)p 3057
4919 V 702 4922 2357 4 v 700 5022 4 100 v 752 4992 a(T)m(raining)19
b(1)p 1510 5022 V 463 w(17)141 b(29)f(7)g(3)99 b(1)h(11)e(21)p
2788 5022 V 185 w(89)p 3057 5022 V 700 5121 V 752 5091
a(T)m(raining)19 b(2/V)-9 b(alidation)p 1510 5121 V 97
w(17)141 b(28)f(6)g(3)99 b(2)h(10)e(21)p 2788 5121 V
185 w(87)p 3057 5121 V 700 5221 V 752 5191 a(T)-6 b(esting)p
1510 5221 V 565 w(34)141 b(57)98 b(13)140 b(6)99 b(2)h(21)e(42)p
2788 5221 V 143 w(175)p 3057 5221 V 702 5224 2357 4 v
700 5324 4 100 v 752 5294 a(T)-7 b(otal)p 1510 5324 V
640 w(68)99 b(114)f(26)h(12)f(5)i(42)e(84)p 2788 5324
V 143 w(351)p 3057 5324 V 702 5327 2357 4 v 1501 5475
a(T)-7 b(able)20 b(2:)25 b(Data)c(set)g(splits)p eop
%%Page: 4 4
4 3 bop -9 74 a Fd(actual)30 b(training,)i(while)f(the)g(remainder)e
(\(T)m(raining)g(2\))i(w)o(as)-9 173 y(used)19 b(to)i(stop)f(training)f
(and)h(select)g(the)h(best)f(architecture.)115 280 y(T)-7
b(able)37 b(3)h(sho)n(ws)f(the)h(confusion)d(matrix)i(for)g(the)g(net-)
-9 380 y(w)o(ork')-5 b(s)25 b(predictions)f(on)i(the)g(testing)g(set.)
42 b(Those)26 b(programs)-9 479 y(that)16 b(were)g(correctly)e
(classi\002ed)j(are)f(sho)n(wn)f(as)i(box)o(ed)d(entries)-9
579 y(on)23 b(the)h(main)f(diagonal.)35 b(As)25 b(can)e(be)h(seen)g
(the)g(netw)o(ork)f(has)-9 678 y(a)30 b(high)g(classi\002cation)g(rate)
h(of)f(81.1\045.)54 b(Authors)30 b(tw)o(o)g(and)-9 778
y(three)e(are)h(ob)o(viously)e(distinct)i(from)f(all)h(others,)i(while)
e(the)-9 878 y(small)k(amount)e(of)h(data)h(a)n(v)n(ailable)f(for)g
(author)f(\002)n(v)o(e)h(seems)-9 977 y(lik)o(ely)21
b(to)h(be)f(responsible)f(for)h(all)h(of)g(those)f(programs)f(being)-9
1077 y(misclassi\002ed.)115 1184 y(Since)i(this)h(technique)d(w)o(as)j
(the)f(only)f(one)h(that)g(required)-9 1283 y(splitting)d(the)h
(training)f(data,)g(all)i(other)e(techniques)f(were)i(de-)-9
1383 y(v)o(eloped)d(using)i(both)f(training)h(data)g(sets)h(\(T)m
(raining)e(1)h(and)g(2\))-9 1483 y(and)27 b(just)h(the)f(\002rst)h
(50\045)g(\(T)m(raining)d(1\).)47 b(The)27 b(other)g(model-)-9
1582 y(ing)18 b(techniques)g(when)g(tuned)g(using)h(both)f(training)g
(data)h(sets)-9 1682 y(could)d(be)h(e)o(xpected)e(to)i(enjo)o(y)g(an)g
(adv)n(antage)e(o)o(v)o(er)h(the)h(neural)-9 1781 y(netw)o(ork)g(model)
h(in)g(terms)h(of)f(the)h(greater)e(number)m(,)g(and)h(thus)-9
1881 y(richness,)27 b(of)f(cases)h(a)n(v)n(ailable.)43
b(While)27 b(in)f(the)h(second)e(case)-9 1981 y(the)31
b(neural)f(netw)o(ork)g(models)h(should)f(ha)n(v)o(e)h(an)g(adv)n
(antage)-9 2080 y(since)21 b(the)o(y)g(are)g(tuned)g(on)g(the)g(same)h
(data)f(set)h(whilst)g(ha)n(ving)-9 2180 y(their)h(generalisability)e
(encouraged)g(by)i(the)g(use)h(of)f(the)g(v)n(al-)-9
2280 y(idation)i(set.)42 b Fa(x)p Fd(4.4)26 b(sho)n(ws)f(the)h
(performance)d(of)j(all)g(models)-9 2379 y(on)19 b(all)i(\(sub\)sets)f
(of)g(data.)-9 2656 y Fc(4.2)99 b(Multiple)25 b(discriminant)g
(analysis)-9 2825 y Fd(The)30 b(MD)m(A)h(w)o(as)h(a)f(stepwise)g(MD)m
(A)g(\(W)m(ilk')-5 b(s)31 b(lambda)f(w)o(as)-9 2924 y(used)20
b(for)f(entry)h(and)f(e)o(xit)h(of)g(v)n(ariables\).)k(Prior)c
(probabilities)-9 3024 y(were)27 b(obtained)f(from)g(the)i(data)f(and)g
(within)h(group)d(co)o(v)n(ari-)-9 3124 y(ance)17 b(matrices)h(were)h
(used.)24 b(As)19 b(discussed)f(in)g Fa(x)p Fd(4.1)f(both)h(sets)-9
3223 y(of)23 b(training)f(data)h(were)h(used)f(as)h(part)f(of)g(the)h
(model)e(param-)-9 3323 y(eter)g(tuning)g(since)h(no)g(model)f
(selection)g(process)h(w)o(as)g(used.)-9 3423 y(Another)f(model)i(w)o
(as)h(de)n(v)o(eloped)c(using)j(only)f(the)h(T)m(raining)-9
3522 y(1)c(data)g(set)h(\(50\045)f(of)g(the)g(training)g(data\).)25
b(See)20 b Fa(x)p Fd(4.4)g(for)g(these)-9 3622 y(results.)115
3729 y(T)-7 b(able)37 b(4)g(sho)n(ws)g(the)g(confusion)d(matrix)j(for)f
(the)h(pre-)-9 3828 y(dictions)29 b(made)g(on)g(the)g(with-held)g
(testing)g(data.)53 b(As)31 b(with)-9 3928 y(the)24 b(neural)f(netw)o
(ork)g(model)g(the)h(performance)d(accurac)o(y)i(is)-9
4027 y(81.1\045)31 b(when)h(using)g(all)h(training)f(data.)62
b(The)32 b(patterns)g(of)-9 4127 y(confusion)c(are)i(similar)g(for)g
(authors)f(four)m(,)i(six,)i(and)d(se)n(v)o(en)-9 4227
y(b)n(ut)20 b(rather)f(dif)n(ferent)g(for)g(the)h(other)g(authors.)-9
4503 y Fc(4.3)99 b(Case-based)25 b(r)n(easoning)-9 4672
y Fd(The)h(case-based)g(reasoning)g(model)g(w)o(as)i(de)n(v)o(eloped)c
(using)-9 4772 y(the)g(ANGEL)h(algorithm,)f(with)g(5)h(analogies)f(and)
g(weighted)-9 4872 y(means)32 b(for)h(case)g(aggre)o(gation.)61
b(T)m(ie)33 b(resolution)f(w)o(as)i(also)-9 4971 y(used.)39
b(All)26 b(v)n(ariables)f(were)g(normalized)e(in)i(order)f(to)i(main-)
-9 5071 y(tain)20 b(a)g(comparable)f(scale.)115 5178
y(All)i(26)g(v)n(ariables)e(were)i(used,)f(with)h(tw)o(o)g(models)f(de)
n(v)o(el-)-9 5277 y(oped)h(\226)h(one)g(using)g(only)f(50\045)h(of)g
(the)h(training)e(data)h(\(T)m(rain-)-9 5377 y(ing)29
b(1\))h(and)f(another)g(using)g(all)i(training)e(data)h(\(T)m(raining)e
(1)-9 5477 y(and)20 b(2\).)27 b(See)21 b Fa(x)p Fd(4.4)f(for)h(a)g
(discussion)f(of)h(the)g(performance)d(of)-9 5576 y(this)i
(reduced-data)e(model.)2064 74 y(T)-7 b(able)26 b(5)g(sho)n(ws)g(the)f
(confusion)f(matrix)i(for)f(the)h(testing)1939 173 y(data)g(set.)41
b(There)25 b(is)i(a)e(considerably)f(higher)g(le)n(v)o(el)h(of)g(accu-)
1939 273 y(rac)o(y)g(compared)e(to)j(the)f(neural)g(netw)o(ork)f(and)h
(discriminant)1939 372 y(analysis)k(models,)g(with)g(88.0\045)e
(accurac)o(y)g(achie)n(v)o(ed)f(when)1939 472 y(using)20
b(all)h(training)e(data.)1939 727 y Fc(4.4)100 b(Comparison)1939
888 y Fd(T)-7 b(able)20 b(6)g(sho)n(ws)g(the)g(results)g(for)f(all)i
(\002)n(v)o(e)e(models)g(de)n(v)o(eloped.)1939 988 y(Note)24
b(that)g(the)h(\223training)d(set\224)j(errors)e(for)g(the)i(CBR)g
(models)1939 1087 y(are)e(lea)n(v)o(e-one-out)d(since)j(the)g(case)g
(to)g(be)g(predicted)f(should)1939 1187 y(ob)o(viously)30
b(not)h(be)h(in)f(the)h(training)e(set.)60 b(As)33 b(can)e(be)h(seen)
1939 1287 y(the)f(results)h(for)e(the)h(FFNN)h(and)f(MD)m(A)g(models)f
(are)h(quite)1939 1386 y(remarkably)g(almost)i(identical)g(\(the)g
(FFNN)h(and)f(full-data)1939 1486 y(MD)m(A)h(are)f(in)h(f)o(act)g
(identical\).)64 b(Ho)n(we)n(v)o(er)m(,)35 b(each)e(of)g(these)1939
1585 y(models)27 b(made)f(rather)g(dif)n(ferent)f(patterns)h(of)g
(confusion)f(on)1939 1685 y(all)c(data)f(sets.)2064 1788
y(The)k(best)g(performing)e(technique)g(in)j(all)f(cases)h(is)h(case-)
1939 1887 y(based)19 b(reasoning.)k(In)c(terms)g(of)g(predicti)n(v)o(e)
f(performance)e(on)1939 1987 y(the)32 b(test)h(data)e(set,)k(its)e
(predictions)d(were)h(almost)h(7\045)g(bet-)1939 2087
y(ter)38 b(which)e(appears)g(to)h(be)g(a)h(useful)e(increase)h(in)g
(perfor)n(-)1939 2186 y(mance.)63 b(Ev)o(en)31 b(with)i(the)g(reduced)f
(training)f(data)i(set,)k(the)1939 2286 y(case-based)e(reasoning)f
(model)g(outperformed)e(the)k(neural)1939 2386 y(netw)o(ork)19
b(model)h(by)f(5.2\045.)2064 2488 y(This)k(is)h(suspected)e(to)h(be)g
(a)h(result)f(of)g(the)g(f)o(act)g(that)g(pro-)1939 2588
y(grammers)32 b(ha)n(v)o(e)g(more)g(than)g(one)g(style)i(of)e
(programming)1939 2687 y(leading)15 b(to)h(se)n(v)o(eral)f
(multi-dimensional)e(\223clouds\224)i(of)g(points.)1939
2787 y(Some)22 b(sets)h(of)e(programs)f(for)i(a)g(gi)n(v)o(en)e
(programmer)f(are)j(ap-)1939 2887 y(parently)31 b(within)i(other)e
(programmer')-5 b(s)30 b(\223clouds\224)i(of)g(met-)1939
2986 y(rics,)44 b(pre)n(v)o(enting)36 b(simple)i(e)o(xplicit)g
(classi\002cation)h(bound-)1939 3086 y(aries)21 b(from)e(properly)f
(classifying)i(the)g(systems.)1939 3383 y Fe(5)120 b(Conclusions)1939
3574 y Fd(The)34 b(use)h(of)f(the)g(proposed)e(set)j(of)f(metrics)h
(for)e(discrimi-)1939 3674 y(nating)22 b(between)f(se)n(v)o(en)h
(authors)f(sho)n(ws)h(promising)f(results,)1939 3774
y(especially)31 b(when)f(using)h(the)g(case-based)f(reasoning)f(tech-)
1939 3873 y(nique.)47 b(All)29 b(techniques)d(ho)n(we)n(v)o(er)g(pro)o
(vided)f(accurac)o(y)h(be-)1939 3973 y(tween)21 b(81.1\045)e(and)h
(88.0\045)g(on)g(a)h(holdout)e(testing)h(set)h(w)o(ould)1939
4073 y(be)c(certainly)f(encouraging)d(for)j(the)h(softw)o(are)f
(forensics)g(\002eld)1939 4172 y(as)21 b(a)g(whole.)2064
4275 y(It)50 b(is)g(tentati)n(v)o(ely)e(suggested)g(here)h(that)g(the)h
(nature)1939 4374 y(of)34 b(class)i(boundaries)c(for)i(forensic)f
(applications)g(is)i(more)1939 4474 y(amenable)20 b(to)h(modeling)e
(using)i(case-based)f(reasoning)f(than)1939 4574 y(partitioning)25
b(approaches.)43 b(The)26 b(idea)h(of)f(multiple)g(clusters)1939
4673 y(suggests)18 b(that)g(other)g(neural)f(netw)o(ork)f
(architectures)h(such)h(as)1939 4773 y(v)n(ariants)i(of)g(L)-8
b(VQ)20 b(could)f(be)h(fruitfully)f(applied)g(here.)2064
4876 y(W)-7 b(e)29 b(are)g(no)n(w)e(comparing)f(the)i(performance)e(of)
i(dif)n(fer)n(-)1939 4975 y(ent)20 b(sets)h(of)f(forensic)f(metrics,)g
(both)g(structural)g(and)h(stylistic)1939 5075 y(to)29
b(determine)e(which)h(are)g(the)g(most)g(useful)g(in)h(certain)f(cir)n
(-)1939 5175 y(cumstances.)54 b(Since)30 b(stylistic)h(metrics)f(are)g
(easier)h(to)f(f)o(ak)o(e)1939 5274 y(than)d(structural,)h(the)f
(ability)g(of)g(the)g(latter)g(to)h(discriminate)1939
5374 y(authorship)19 b(is)i(more)e(useful.)2064 5477
y(Another)h(area)i(of)g(interest)f(is)i(ho)n(w)e(each)h(technique)e
(per)n(-)1939 5576 y(forms)i(gi)n(v)o(en)e(certain)i(quantities)g(of)f
(data.)31 b(Whilst)23 b(the)f(CBR)p eop
%%Page: 5 5
5 4 bop 662 221 2437 4 v 660 320 4 100 v 1185 320 V 1598
290 a Fd(Predicted)19 b(author)g(number)p 2829 320 V
3097 320 V 660 420 V 1185 420 V 1334 390 a(1)197 b(2)g(3)183
b(4)f(5)197 b(6)g(7)p 2829 420 V 99 w(T)-7 b(otal)p 3097
420 V 662 423 2437 4 v 660 538 4 115 v 712 715 a(Actual)712
815 y(author)712 914 y(number)1095 508 y(1)p 1185 538
V 1236 426 140 4 v 1236 534 4 108 v 127 w(20)p 1372 534
V 1236 537 140 4 v 225 w(1)197 b(6)183 b(1)421 b(1)197
b(5)p 2829 538 4 115 v 185 w(34)p 3097 538 V 660 653
V 1095 623 a(2)p 1185 653 V 1475 541 140 4 v 1475 649
4 108 v 367 w(57)p 1611 649 V 1475 652 140 4 v 2829 653
4 115 v 1378 w(57)p 3097 653 V 660 767 V 1095 737 a(3)p
1185 767 V 1714 656 140 4 v 1714 763 4 108 v 606 w(13)p
1851 763 V 1714 766 140 4 v 2829 767 4 115 v 1139 w(13)p
3097 767 V 660 882 V 1095 852 a(4)p 1185 882 V 436 w(2)p
1980 771 98 4 v 1980 877 4 107 v 393 w(4)p 2075 877 V
1980 880 98 4 v 2829 882 4 115 v 957 w(6)p 3097 882 V
660 997 V 1095 967 a(5)p 1185 997 V 436 w(2)p 2204 885
98 4 v 2204 993 4 108 v 617 w(0)p 2299 993 V 2204 996
98 4 v 2829 997 4 115 v 733 w(2)p 3097 997 V 660 1112
V 1095 1082 a(6)p 1185 1112 V 197 w(1)g(2)g(1)p 2402
1000 140 4 v 2402 1107 4 108 v 576 w(17)p 2538 1107 V
2402 1110 140 4 v 2829 1112 4 115 v 452 w(21)p 3097 1112
V 660 1226 V 1095 1196 a(7)p 1185 1226 V 197 w(4)g(3)g(4)p
2641 1115 140 4 v 2641 1222 4 108 v 815 w(31)p 2777 1222
V 2641 1225 140 4 v 2829 1226 4 115 v 213 w(42)p 3097
1226 V 662 1230 2437 4 v 660 1329 4 100 v 968 1299 a(T)-7
b(otal)p 1185 1329 V 156 w(25)155 b(67)g(24)182 b(5)g(0)155
b(18)g(36)p 2829 1329 V 143 w(175)p 3097 1329 V 662 1333
2437 4 v 284 1480 a(T)-7 b(able)20 b(3:)26 b(Confusion)18
b(matrix)i(for)g(testing)g(data)g(predictions)e(from)i(FFNN)h(model)e
(using)h(all)g(training)f(data)p 662 2105 V 660 2204
4 100 v 1185 2204 V 1598 2174 a(Predicted)g(author)g(number)p
2829 2204 V 3097 2204 V 660 2304 V 1185 2304 V 1334 2274
a(1)197 b(2)g(3)183 b(4)f(5)197 b(6)g(7)p 2829 2304 V
99 w(T)-7 b(otal)p 3097 2304 V 662 2307 2437 4 v 660
2422 4 115 v 712 2599 a(Actual)712 2699 y(author)712
2798 y(number)1095 2392 y(1)p 1185 2422 V 1236 2310 140
4 v 1236 2418 4 108 v 127 w(26)p 1372 2418 V 1236 2421
140 4 v 225 w(1)646 b(3)197 b(1)g(3)p 2829 2422 4 115
v 185 w(34)p 3097 2422 V 660 2537 V 1095 2507 a(2)p 1185
2537 V 197 w(2)p 1475 2425 140 4 v 1475 2533 4 108 v
128 w(52)p 1611 2533 V 1475 2536 140 4 v 449 w(1)421
b(2)p 2829 2537 4 115 v 424 w(57)p 3097 2537 V 660 2651
V 1095 2621 a(3)p 1185 2651 V 197 w(1)197 b(2)p 1714
2540 140 4 v 1714 2647 4 108 v 128 w(10)p 1851 2647 V
1714 2650 140 4 v 2829 2651 4 115 v 1139 w(13)p 3097
2651 V 660 2766 V 1095 2736 a(4)p 1185 2766 V 436 w(2)p
1980 2655 98 4 v 1980 2761 4 107 v 393 w(4)p 2075 2761
V 1980 2764 98 4 v 2829 2766 4 115 v 957 w(6)p 3097 2766
V 660 2881 V 1095 2851 a(5)p 1185 2881 V 900 w(1)p 2204
2769 98 4 v 2204 2877 4 108 v 153 w(0)p 2299 2877 V 2204
2880 98 4 v 465 w(1)p 2829 2881 4 115 v 226 w(2)p 3097
2881 V 660 2996 V 1095 2966 a(6)p 1185 2996 V 197 w(2)g(2)g(1)p
2402 2884 140 4 v 2402 2992 4 108 v 576 w(16)p 2538 2992
V 2402 2995 140 4 v 2829 2996 4 115 v 452 w(21)p 3097
2996 V 660 3110 V 1095 3080 a(7)p 1185 3110 V 197 w(3)g(3)g(2)p
2641 2999 140 4 v 2641 3106 4 108 v 815 w(34)p 2777 3106
V 2641 3109 140 4 v 2829 3110 4 115 v 213 w(42)p 3097
3110 V 662 3114 2437 4 v 660 3213 4 100 v 968 3183 a(T)-7
b(otal)p 1185 3213 V 156 w(34)155 b(62)g(13)182 b(6)g(3)155
b(19)g(38)p 2829 3213 V 143 w(175)p 3097 3213 V 662 3217
2437 4 v 295 3364 a(T)-7 b(able)20 b(4:)26 b(Confusion)18
b(matrix)i(for)f(testing)i(data)f(predictions)e(from)i(MD)m(A)g(model)f
(using)h(all)g(training)f(data)p 662 3989 V 660 4088
4 100 v 1185 4088 V 1598 4058 a(Predicted)g(author)g(number)p
2829 4088 V 3097 4088 V 660 4188 V 1185 4188 V 1334 4158
a(1)197 b(2)g(3)183 b(4)f(5)197 b(6)g(7)p 2829 4188 V
99 w(T)-7 b(otal)p 3097 4188 V 662 4191 2437 4 v 660
4306 4 115 v 712 4483 a(Actual)712 4583 y(author)712
4682 y(number)1095 4276 y(1)p 1185 4306 V 1236 4194 140
4 v 1236 4302 4 108 v 127 w(28)p 1372 4302 V 1236 4305
140 4 v 225 w(1)197 b(2)885 b(3)p 2829 4306 4 115 v 185
w(34)p 3097 4306 V 660 4421 V 1095 4391 a(2)p 1185 4421
V 1475 4309 140 4 v 1475 4417 4 108 v 367 w(57)p 1611
4417 V 1475 4420 140 4 v 2829 4421 4 115 v 1378 w(57)p
3097 4421 V 660 4535 V 1095 4505 a(3)p 1185 4535 V 1714
4424 140 4 v 1714 4531 4 108 v 606 w(13)p 1851 4531 V
1714 4534 140 4 v 2829 4535 4 115 v 1139 w(13)p 3097
4535 V 660 4650 V 1095 4620 a(4)p 1185 4650 V 436 w(2)p
1980 4539 98 4 v 1980 4645 4 107 v 393 w(4)p 2075 4645
V 1980 4648 98 4 v 2829 4650 4 115 v 957 w(6)p 3097 4650
V 660 4765 V 1095 4735 a(5)p 1185 4765 V 436 w(1)p 2204
4653 98 4 v 2204 4760 4 107 v 617 w(1)p 2299 4760 V 2204
4763 98 4 v 2829 4765 4 115 v 733 w(2)p 3097 4765 V 660
4880 V 1095 4850 a(6)p 1185 4880 V 436 w(5)p 2402 4768
140 4 v 2402 4876 4 108 v 815 w(16)p 2538 4876 V 2402
4879 140 4 v 2829 4880 4 115 v 452 w(21)p 3097 4880 V
660 4994 V 1095 4964 a(7)p 1185 4994 V 197 w(1)197 b(5)g(2)183
b(2)p 2641 4883 140 4 v 2641 4990 4 108 v 590 w(32)p
2777 4990 V 2641 4993 140 4 v 2829 4994 4 115 v 213 w(42)p
3097 4994 V 662 4998 2437 4 v 660 5097 4 100 v 968 5067
a(T)-7 b(otal)p 1185 5097 V 156 w(29)155 b(71)g(17)182
b(6)g(1)155 b(16)g(35)p 2829 5097 V 143 w(175)p 3097
5097 V 662 5101 2437 4 v 318 5248 a(T)-7 b(able)20 b(5:)25
b(Confusion)19 b(matrix)g(for)h(testing)g(data)g(predictions)f(from)g
(CBR)j(model)d(using)h(all)h(training)e(data)p eop
%%Page: 6 6
6 5 bop 409 -6 2942 4 v 407 93 4 100 v 459 64 a Fd(Model)p
1469 93 V 843 w(T)m(raining)19 b(1)p 1914 93 V 99 w(T)m(raining)g(2)p
2359 93 V 99 w(T)m(raining)g(1)h(and)g(2)p 3007 93 V
99 w(T)-6 b(esting)p 3350 93 V 409 97 2942 4 v 407 196
4 100 v 459 167 a(MD)m(A)20 b(\(using)g(50\045)g(training\))p
1469 196 V 291 w(98.9\045)p 1914 196 V 229 w(79.3\045)p
2359 196 V 432 w(89.2\045)p 3007 196 V 127 w(84.6\045)p
3350 196 V 407 296 V 459 266 a(MD)m(A)g(\(using)g(100\045)f(training\))
p 1469 296 V 250 w(93.3\045)p 1914 296 V 229 w(85.1\045)p
2359 296 V 432 w(89.2\045)p 3007 296 V 127 w(81.1\045)p
3350 296 V 407 396 V 459 366 a(CBR)j(\(using)d(50\045)h(training\))p
1469 396 V 316 w(87.6\045)p 1914 396 V 229 w(81.6\045)p
2359 396 V 432 w(84.7\045)p 3007 396 V 127 w(86.3\045)p
3350 396 V 407 495 V 459 465 a(CBR)i(\(using)d(100\045)h(training\))p
1469 495 V 274 w(88.8\045)p 1914 495 V 229 w(80.6\045)p
2359 495 V 432 w(84.7\045)p 3007 495 V 127 w(88.0\045)p
3350 495 V 407 595 V 459 565 a(FFNN)h(\(using)f(100\045)f(training\))p
1469 595 V 228 w(98.9\045)p 1914 595 V 229 w(79.3\045)p
2359 595 V 432 w(89.2\045)p 3007 595 V 127 w(81.1\045)p
3350 595 V 409 598 2942 4 v 1162 746 a(T)-7 b(able)20
b(6:)25 b(Results)c(for)f(discriminating)e(models)-9
1096 y(models)k(were)g(better)g(here)h(it)g(w)o(ould)f(seem)h(lik)o
(ely)f(that)h(their)-9 1196 y(performance)c(w)o(ould)i(suf)n(fer)h
(more)f(from)g(losing)h(data)g(when)-9 1296 y(compared)g(to)j(models)f
(using)h(actual)f(classi\002cation)h(bound-)-9 1395 y(aries.)-9
1668 y Fe(Refer)n(ences)-9 1853 y Fd([1])40 b(A.)i(Gray)-5
b(,)46 b(P)-9 b(.)42 b(Sallis,)49 b(and)41 b(S.)h(MacDonell.)98
b(Identi-)129 1953 y(\002ed)20 b(\(inte)o(grated)e(dictionary-based)g
(e)o(xtraction)g(of)i(non-)129 2053 y(language-dependent)15
b(tok)o(en)20 b(information)e(for)i(forensic)129 2152
y(identi\002cation,)g(e)o(xamination,)f(and)h(discrimination\):)25
b(A)129 2252 y(dictionary-based)18 b(system)j(for)f(e)o(xtracting)f
(source)i(code)129 2352 y(metrics)28 b(for)g(softw)o(are)g(forensics.)
54 b(In)28 b Fb(Pr)l(oceedings)g(of)129 2451 y(SE:E&P'98)13
b(\(Softwar)m(e)i(Engineering:)20 b(Education)14 b(and)129
2551 y(Pr)o(actice)22 b(Confer)m(ence\))p Fd(,)f(pages)h(252\226259.)d
(IEEE)j(Com-)129 2650 y(puter)d(Society)h(Press,)h(1998.)-9
2800 y([2])40 b(I.)27 b(Krsul)f(and)g(E.)h(H.)g(Spaf)n(ford.)48
b(Authorship)25 b(analysis:)129 2900 y(Identifying)20
b(the)i(author)f(of)h(a)h(program.)33 b Fb(Computer)o(s)23
b(&)129 2999 y(Security)p Fd(,)c(16\(3\):233\226256,)c(1997.)-9
3149 y([3])40 b(P)-9 b(.)38 b(Sallis,)k(A.)c(Aakjaer)m(,)i(and)d(S.)h
(MacDonell.)83 b(Soft-)129 3248 y(w)o(are)35 b(forensics:)54
b(Old)35 b(methods)f(for)g(a)h(ne)n(w)g(science.)2078
1096 y(In)19 b Fb(Pr)l(oceedings)f(of)i(SE:E&P'96)d(\(Softwar)m(e)i
(Engineer)n(-)2078 1196 y(ing:)74 b(Education)43 b(and)h(Pr)o(actice\))
p Fd(,)50 b(pages)45 b(367\226371.)2078 1296 y(IEEE)19
b(Computer)g(Society)h(Press,)h(1996.)1939 1483 y([4])41
b(P)-9 b(.)28 b(Sallis,)j(S.)e(MacDonell,)g(G.)g(MacLennan,)f(A.)h
(Gray)-5 b(,)2078 1582 y(and)33 b(R.)i(Kilgour)-5 b(.)72
b(Identi\002ed:)52 b(Softw)o(are)34 b(authorship)2078
1682 y(analysis)f(with)g(case-based)f(reasoning.)68 b(In)33
b Fb(Pr)l(oceed-)2078 1781 y(ings)22 b(of)g(the)f(Addendum)f(Session)i
(of)g(the)g(1997)e(Interna-)2078 1881 y(tional)j(Confer)m(ence)g(on)h
(Neur)o(al)g(Information)e(Pr)l(ocess-)2078 1981 y(ing)e(and)g
(Intellig)o(ent)f(Information)g(Systems)p Fd(,)i(pages)g(53\226)2078
2080 y(56,)e(1998.)1939 2267 y([5])41 b(M.)17 b(Shepperd)d(and)j(C.)g
(Scho\002eld.)k(Estimating)16 b(softw)o(are)2078 2367
y(project)25 b(ef)n(fort)g(using)h(analogies.)48 b Fb(IEEE)26
b(T)-5 b(r)o(ansactions)2078 2467 y(on)19 b(Softwar)m(e)h(Engineering)p
Fd(,)e(23\(11\):736\226743,)d(1997.)1939 2654 y([6])41
b(E.)24 b(H.)g(Spaf)n(ford)e(and)i(S.)g(A.)h(W)-7 b(eeber)i(.)42
b(Softw)o(are)23 b(foren-)2078 2753 y(sics:)29 b(Can)22
b(we)g(track)f(code)g(to)g(its)i(authors?)41 b Fb(Computer)o(s)2078
2853 y(&)20 b(Security)p Fd(,)g(12:585\226595,)c(1993.)1939
3040 y([7])41 b(G.)23 b(Whale.)39 b(Softw)o(are)22 b(metrics)h(and)g
(plagiarism)f(detec-)2078 3139 y(tion.)d Fb(J)n(ournal)c(of)h(Systems)g
(and)f(Softwar)m(e)p Fd(,)i(13:131\226138,)2078 3239
y(1990.)p eop
%%Trailer
end
userdict /end-hook known{end-hook}if
%%EOF