# Copyright (C) 2008 Libresoft Research Group
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# Authors : Israel Herraiz <herraiz@gsyc.escet.urjc.es>

from mydatabase import MyDatabase
import os
import commands
import sys

class MyApp:

    def __init__(self,host,user,password,dbname):

        self.db = MyDatabase()

        self.db.user = user
        self.db.password = password
        self.db.host = host
        self.db.name = dbname

        self.db.connect()

    def run(self):

        # List of categories
        categories = [c[0] for c in self.db.getListOfCategories()]


        for c in categories:
            printline = c+' '
            types = ('header','nonheader','global')

            for t in types:
                # Get metrics for headers, non-headers and all the files
                # The output is written to a temp file
                tempfilename = os.tempnam('/tmp','metrics_')

                self.db.getMetricsForCategory(c,tempfilename,t)

                # Get correlation coefficients
                coeffs = self.getCoefficients(tempfilename)
                printline += ' '.join(coeffs)+' '

            print printline
                
                


    def getCoefficients(self,datafilename):
        code = "dataset <- read.table('"+datafilename+"');\n"
        code += "dataset<-dataset[dataset$V11>=0,]; dataset<-dataset[dataset$V1>0,];\n\n"

        # Whole sample
        code += "print(cor(dataset$V1, dataset$V6),digits=2);\n" # SLOC - Mccabe 
        code += "print(cor(dataset$V1, dataset$V8),digits=2);\n" # SLOC - Hlength 
        code += "print(cor(dataset$V1, dataset$V9),digits=2);\n" # SLOC - HVolume 
        code += "print(cor(dataset$V1, dataset$V10),digits=2);\n" # SLOC - HLevel 
        code += "print(cor(dataset$V1, dataset$V11),digits=2);\n" # SLOC - Hmd 
        code += "print(cor(dataset$V1, dataset$V12),digits=2);\n\n" # SLOC - Functions 

        # Filtered sample
        code += "fdataset <- dataset[dataset$V1<11900,]\n"
        code += "print(cor(fdataset$V1, fdataset$V6),digits=2);\n" # SLOC - Mccabe 
        code += "print(cor(fdataset$V1, fdataset$V8),digits=2);\n" # SLOC - Hlength 
        code += "print(cor(fdataset$V1, fdataset$V9),digits=2);\n" # SLOC - HVolume 
        code += "print(cor(fdataset$V1, fdataset$V10),digits=2);\n" # SLOC - HLevel 
        code += "print(cor(fdataset$V1, fdataset$V11),digits=2);\n" # SLOC - Hmd 
        code += "print(cor(fdataset$V1, fdataset$V12),digits=2);\n\n" # SLOC - Functions
        
        # Outliers sample
        code += "odataset <- dataset[dataset$V1>=11900,]\n"
        code += "print(cor(odataset$V1, odataset$V6),digits=2);\n" # SLOC - Mccabe 
        code += "print(cor(odataset$V1, odataset$V8),digits=2);\n" # SLOC - Hlength 
        code += "print(cor(odataset$V1, odataset$V9),digits=2);\n" # SLOC - HVolume 
        code += "print(cor(odataset$V1, odataset$V10),digits=2);\n" # SLOC - HLevel 
        code += "print(cor(odataset$V1, odataset$V11),digits=2);\n" # SLOC - Hmd 
        code += "print(cor(odataset$V1, odataset$V12),digits=2);\n" # SLOC - Functions
        
        # Create temp file
        codefilename = os.tempnam('/tmp','herraiz_rcode_')
        codefile = open(codefilename,'w')
        codefile.write(code)
        codefile.close()

        cmd = "R --vanilla --slave -f "+codefilename
        
        output = commands.getoutput(cmd)
        

        coeffs = []

        for l in output.split('\n'):
            coeff = l.split(' ')[1]
            coeffs.append(coeff)

        return coeffs
            
if '__main__' == __name__:

    host = 'localhost'
    user = 'root'
    password = 'root'
    dbname = 'metrics'
    app = MyApp(host=host,
                user=user,
                password=password,
                dbname=dbname)
    app.run()

