# Copyright (C) 2008 Libresoft Research Group
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# Authors : Israel Herraiz <herraiz@gsyc.escet.urjc.es>


from mydatabase import MyDatabase
import os
import commands
import sys
import re

class MyApp:

    def __init__(self,host,user,password,dbname):

        self.db = MyDatabase()

        self.db.user = user
        self.db.password = password
        self.db.host = host
        self.db.name = dbname

        self.db.connect()

    def run(self):

        # Get files to be measured
        print "Querying,,,"
        self.db.getCFiles()
        row = self.db.cursor_get.fetchone()
        while row:
            filemd5 = row[0]
            filepath = row[1]
            sloc = row[2]

            print "Measuring "+filepath+"..."

            c_funcs = self.measureFunctions(filepath)
            loc = self.measureLOC(filepath)
            cyclo, returns = self.measureMcCabe(filepath)
            h_length, h_volume, h_level, h_md = self.measureHalstead(filepath)
            blank_lines, comment_lines, num_comments = self.measureKDSI(filepath)

            results_dict = {'md5':filemd5, \
                            'sloc':sloc, \
                            'loc':loc, \
                            'blk_lines':blank_lines, \
                            'comment_lines':comment_lines, \
                            'comments':num_comments, \
                            'mccabe':cyclo, \
                            'returns':returns, \
                            'halstead_length':h_length, \
                            'halstead_volume':h_volume, \
                            'halstead_level':h_level, \
                            'halstead_md':h_md, \
                            'functions':c_funcs}

            self.db.insertMetrics(results_dict)

            row = self.db.cursor_get.fetchone()
                            

    def update(self):

        # Get files with negative values
        print "Getting files with negative values..."
        self.db.getCWrongFiles()
        row = self.db.cursor_get.fetchone()
        while row:
            filemd5 = row[0]
            filepath = row[1]
            sloc = row[2]

            print "Measuring "+filepath+" ("+filemd5+") size:"+str(sloc)
            if filepath.find(" ") != -1:
                print "****SPACES****"
            
            #c_funcs = self.measureFunctions(filepath)
            #loc = self.measureLOC(filepath)
            #cyclo, returns = self.measureMcCabe(filepath)
            h_length, h_volume, h_level, h_md = self.measureHalstead(filepath)
            #blank_lines, comment_lines, num_comments = self.measureKDSI(filepath)

            if h_length != -1:
            
                results_dict = {'md5':filemd5, \
                                'halstead_length':h_length, \
                                'halstead_volume':h_volume, \
                                'halstead_level':h_level, \
                                'halstead_md':h_md}
                #'sloc':sloc, \
                #'loc':loc, \
                #'blk_lines':blank_lines, \
                #'comment_lines':comment_lines, \
                #'comments':num_comments, \
                #'mccabe':cyclo, \
                #'returns':returns, \
                #'functions':c_funcs}

                self.db.updateMetrics(results_dict)

            else:

                print "   Skipping"

            row = self.db.cursor_get.fetchone()

    def identifyAuto(self):
        # Get unique files paths
        print "Getting paths for unique files..."
        self.db.getCFiles()
        row = self.db.cursor_get.fetchone()
        counter = 0
        total = 0
        while row:
            filemd5 = row[0]
            filepath = row[1]

            # Get first 50 lines of the file
            fileobj = open(filepath,'r')
            lines = fileobj.readlines()[0:50]
            fileobj.close()

            # Match these regular expressions
            patterns = ("generated automatically", \
                        "automatically generated", \
                        "generated by", \
                        "a lexical scanner generated by flex", \
                        "this is a generated file", \
                        "generated with the.*utility", \
                        "do not edit",\
                        "autogenerated",\
                        "machine generated", \
                        "produced by", \
                        "automatically written", \
                        "created automatically", \
                        "automatically created", \
                        "codepage for", \
                        "mapping table", \
                        "generated from", \
                        "conversion table",\
                        "generated with", \
                        "scanner table")

            auto = False
            total += 1
            for l in lines:
                for p in patterns:
                    if p.lower() in l.lower():
                        auto = True
                        counter += 1
                        break

                if auto:
                    break

            if 0 == (total % 10000):
                print "%s files analyzed so far (%s of them are automated)" % (str(total),str(counter))

            # Write this to database
            self.db.writeAutoFile(filemd5,auto)

            row = self.db.cursor_get.fetchone()

        print "%s automated files (out of %s)" % (str(counter),str(total))
                

    def measureFunctions(self,filepath):

        cmd = 'c_functions "'+filepath+'"'
        output = commands.getoutput(cmd)

        try:
            funcs = output.split(' ')[-1]
            funcs = int(funcs)
        except:
            #funcs = -1
            print cmd
            print output
            sys.exit(0)

        return funcs

    def measureLOC(self,filepath):

        cmd = 'wc -l "'+filepath+'"'
        output = commands.getoutput(cmd)

        try:
            loc = output.lstrip(' ').split(' ')[0]
            loc = int(loc)
        except:
            print cmd
            print output
            #sys.exit(0)
            return -1

        return loc

    def measureMcCabe(self,filepath):
        cmd = 'mccabe -n "'+filepath+'"'
        output = commands.getoutput(cmd)

        total_mccabe = 0
        total_returns = 0

        for l in output.split('\n'):

            try:
                mccabe = int(l.split('\t')[-2])
                returns = int(l.split('\t')[-1])
            except:
                #print cmd
                #print output
                #sys.exit(0)
                return -1, -1

            total_mccabe += mccabe
            total_returns += returns

        return total_mccabe, total_returns

    def measureHalstead(self,filepath):
        cmd = 'halstead "'+filepath+'"'
        output = commands.getoutput(cmd)

        output = output.split('\t')

        try:
            length = int(output[1])
            volume = int(output[2])
            level = float(output[3])
            md = int(output[4])
        except:
            print cmd
            print output
            #sys.exit(0)
            length = -1
            volume = -1
            level = -1
            md = -1

        return length,volume,level,md

    def measureKDSI(self,filepath):
        cmd = 'kdsi "'+filepath+'"'
        output = commands.getoutput(cmd)
        output = output.split(' ')

        try:
            while '' in output:
                output.remove('')

            blank_lines = int(output[1])
            comment_lines = int(output[2])
            comments = int(output[3])
        except:
            print cmd
            print output
            #sys.exit(0)
            blank_lines = -1
            comment_lines = -1
            comments = -1
        
        return blank_lines, comment_lines, comments

if '__main__' == __name__:

    host = 'localhost'
    user = 'root'
    password = 'root'
    dbname = 'metrics_tse_herraiz'
    app = MyApp(host=host, 
                user=user, 
                password=password, 
                dbname=dbname)
    #app.run()
    #app.update()
    app.identifyAuto()
