function () { #Note this function produces the structural measures and raw defect count for #each class in KC1. The table is returned as an R object. If you want you can #export it from R as a csv file. (R manual at http://www.r-project.org) #Read in the files. Make sure that they are in the same directory prior to reading. #The files below can be obtained from http://mdp.ivv.nasa.gov defects <- read.table("KC1_defect_product_relations.csv",header=T,sep=",",row.names=NULL); hierarchy <- read.table("KC1_product_hierarchy.csv",header=T,sep=",",row.names=NULL); classmetrics <- read.table("KC1_product_class_metrics.csv",header=T,sep=",",row.names=NULL); modulemetrics <- read.table("KC1_product_module_metrics.csv",header=T,sep=",",row.names=NULL); #The below two lines eliminate the extra columns caused #by the terminating commas in KC1_product_class_metrics.csv and KC1_product_module_metrics.csv classmetrics<-classmetrics[,1:dim(classmetrics)[2]-1]; modulemetrics<-modulemetrics[,1:dim(modulemetrics)[2]-1]; #Note that, for some reason the column that includes class id s was named #as MODULE in KC1_product_class_metrics.csv. Change that to CLASS_ID names(classmetrics)[1]<-c("CLASS_ID"); #Remove the columns that have mostly NAs. This step was decided after seeing the data modulemetrics<-modulemetrics[,-c(8,9,24:26)]; #Get the defects at module level defects <- aggregate.data.frame(defects,list(defects$MODULE_ID),length)[,1:2] names(defects)[1:2]<-c("MODULE_ID","NUMDEFECTS") #Merge module metrics with the defects (module level) md<-merge(modulemetrics,defects,by.x="MODULE",by.y="MODULE_ID",all.x=T); md[is.na(md$NUMDEFECTS),]$NUMDEFECTS<-0 #Merge hierarchy with the classmetrics hc <- merge(hierarchy,classmetrics,by="CLASS_ID",all=F); #Finally merge class&method level measurements and defect data hcmd <- merge(hc,md,by.x="MODULE_ID",by.y="MODULE",all=F); #Now, get class level defect data classdefects <- aggregate.data.frame(hcmd[,36],list(hcmd$CLASS_ID),sum); names(classdefects)[1:2]<-c("CLASS_ID","NUMDEFECTS"); #Aggregate the method metrics at the class level. #The measures will be min, max, avg, and sum. minaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),min); maxaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),max); avgaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),mean); sumaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),sum); names(minaggmethmet)<-c("Group.1",paste("min",names(hcmd[,15:35]),sep="")); names(maxaggmethmet)<-c("Group.1",paste("max",names(hcmd[,15:35]),sep="")); names(avgaggmethmet)<-c("Group.1",paste("avg",names(hcmd[,15:35]),sep="")); names(sumaggmethmet)<-c("Group.1",paste("sum",names(hcmd[,15:35]),sep="")); #Now merge the aggregated method metrics aggmethmet1 <- merge(minaggmethmet,maxaggmethmet,by="Group.1",all=F); aggmethmet2 <- merge(avgaggmethmet,sumaggmethmet,by="Group.1",all=F); aggmethmet <- merge(aggmethmet1,aggmethmet2,by="Group.1",all=F); names(aggmethmet)[1] <-c("CLASS_ID") #Add class level metrics result<-merge(classmetrics,aggmethmet,by="CLASS_ID",all=F); #Add class level defect data result<-merge(result,classdefects,by="CLASS_ID",all=F); #Finally remove the CLASS_ID parts, it is not needed in the analysis. result<-result[,c(2:dim(result)[2])]; result }