function ()
{
#Note this function produces the structural measures and raw defect count for
#each class in KC1. The table is returned as an R object. If you want you can
#export it from R as a csv file. (R manual at http://www.r-project.org)
#Read in the files. Make sure that they are in the same directory prior to reading.
#The files below can be obtained from http://mdp.ivv.nasa.gov
defects <- read.table("KC1_defect_product_relations.csv",header=T,sep=",",row.names=NULL);
hierarchy <- read.table("KC1_product_hierarchy.csv",header=T,sep=",",row.names=NULL);
classmetrics <- read.table("KC1_product_class_metrics.csv",header=T,sep=",",row.names=NULL);
modulemetrics <- read.table("KC1_product_module_metrics.csv",header=T,sep=",",row.names=NULL);

#The below two lines eliminate the extra columns caused
#by the terminating commas in KC1_product_class_metrics.csv and KC1_product_module_metrics.csv
classmetrics<-classmetrics[,1:dim(classmetrics)[2]-1];
modulemetrics<-modulemetrics[,1:dim(modulemetrics)[2]-1];

#Note that, for some reason the column that includes class id s was named
#as MODULE in KC1_product_class_metrics.csv. Change that to CLASS_ID
names(classmetrics)[1]<-c("CLASS_ID");

#Remove the columns that have mostly NAs. This step was decided after seeing the data
modulemetrics<-modulemetrics[,-c(8,9,24:26)];

#Get the defects at module level
defects <- aggregate.data.frame(defects,list(defects$MODULE_ID),length)[,1:2]
names(defects)[1:2]<-c("MODULE_ID","NUMDEFECTS")

#Merge module metrics with the defects (module level)
md<-merge(modulemetrics,defects,by.x="MODULE",by.y="MODULE_ID",all.x=T);
md[is.na(md$NUMDEFECTS),]$NUMDEFECTS<-0

#Merge hierarchy with the classmetrics
hc <- merge(hierarchy,classmetrics,by="CLASS_ID",all=F);

#Finally merge class&method level measurements and defect data
hcmd <- merge(hc,md,by.x="MODULE_ID",by.y="MODULE",all=F);

#Now, get class level defect data
classdefects <- aggregate.data.frame(hcmd[,36],list(hcmd$CLASS_ID),sum);
names(classdefects)[1:2]<-c("CLASS_ID","NUMDEFECTS");

#Aggregate the method metrics at the class level.
#The measures will be min, max, avg, and sum.

minaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),min);
maxaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),max);
avgaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),mean);
sumaggmethmet <- aggregate.data.frame(hcmd[,15:35],list(hcmd$CLASS_ID),sum);
names(minaggmethmet)<-c("Group.1",paste("min",names(hcmd[,15:35]),sep=""));
names(maxaggmethmet)<-c("Group.1",paste("max",names(hcmd[,15:35]),sep=""));
names(avgaggmethmet)<-c("Group.1",paste("avg",names(hcmd[,15:35]),sep=""));
names(sumaggmethmet)<-c("Group.1",paste("sum",names(hcmd[,15:35]),sep=""));

#Now merge the aggregated method metrics
aggmethmet1 <- merge(minaggmethmet,maxaggmethmet,by="Group.1",all=F);
aggmethmet2 <- merge(avgaggmethmet,sumaggmethmet,by="Group.1",all=F);
aggmethmet <- merge(aggmethmet1,aggmethmet2,by="Group.1",all=F);
names(aggmethmet)[1] <-c("CLASS_ID")

#Add class level metrics
result<-merge(classmetrics,aggmethmet,by="CLASS_ID",all=F);

#Add class level defect data
result<-merge(result,classdefects,by="CLASS_ID",all=F);

#Finally remove the CLASS_ID parts, it is not needed in the analysis.
result<-result[,c(2:dim(result)[2])];


result
}
