#!/usr/bin/python

#Author: Timur Pocheptsov, 17/03/2014.
#A script to generate rootcoreteam.h from git stats.

from urllib import urlopen
from HTMLParser import HTMLParser

header1 = r"""#ifndef ROOT_ROOTCoreTeam
#define ROOT_ROOTCoreTeam

namespace ROOT {
namespace ROOTX {

//This file is automatically generated with names from http://root.cern.ch/gitstats/authors.html.
//The names are sorted in an alphabetical order (sorted by a family name).
//Please note the structure: it should be always like this - names as
//string literals in an array's initializer
//with a terminating 0 - that's what our rootxx.cxx and rootx-cocoa.mm expect.
//The array (or its elements actually) has an internal linkage
//(it has a definition here, not in rootxx.cxx or rootx-cocoa.mm files.
//Please, do not modify this file.

const char * gROOTCoreTeam[] = {
"""

header2 = r"""
        0};
}
}

#endif
"""


class ParserState:
    #waiting must be the first, done and errorFound the last.
    waiting = 0 #initial idle state
    started = 1 #<table> found, waiting for a header row.
    parsingHeaderRow = 2
    parsingHeaderCell = 3
    parsingRow = 4 #we've parsed a header and now reading the table.
    parsingCell = 5
    done = 6 #we stopped after the first table.
    errorFound = 7 #something bad happend.


class GitStatsParser(HTMLParser) :
    def __init__(self) :
        HTMLParser.__init__(self)

        self.table = []
        self.state = ParserState.waiting

    #a couple of aux. functions.
    def isParsing(self) :
        return self.state > ParserState.waiting and self.state < ParserState.done
    def isParsingCell(self) :
        return self.state == ParserState.parsingCell or self.state == ParserState.parsingHeaderCell

    #start tags. we are interested only in the first <table> and
    #any of <tr>, <th>, <td> (in the correct order and place).
    def handle_starttag(self, tag, attrs):
        if self.state >= ParserState.done :#done or error.
            return

        if tag == 'table' :
            #we need only one table, when the state is 'waiting'.
            if self.state == ParserState.waiting :
                self.state = ParserState.started
            else :
                #any other state and <table> means an error.
                self.state = ParserState.errorFound
        elif not self.isParsing() :#ignore tags outside of our table.
            return

        if tag == 'tr' :
            if self.state == ParserState.parsingRow :
                #we can be here only after a valid header row.
                self.currentColumn = 0
            #header is <tr><th>...</th>...<th>...</th></tr>
            elif self.state == ParserState.started :
                #<tr> in every other state is an error.
                self.state = ParserState.parsingHeaderRow
                self.nFields = 0
                self.authorIndex = -1
            else :
                self.state = ParserState.errorFound
        elif tag == 'td' :
            if self.state == ParserState.parsingRow :
                #valid td can happen only inside a table's row.
                self.state = ParserState.parsingCell
                self.cellData = ''
            else :
                self.state = ParserState.errorFound
        elif tag == 'th' :
            if self.state == ParserState.parsingHeaderRow :
                self.state = ParserState.parsingHeaderCell
                self.cellData = ''
            else :
                self.state = ParserState.errorFound


    def handle_endtag(self, tag) :
        #we need only the first </table> or any of </th>, </tr>, <td>.
        if not self.isParsing() : #wating, done or error.
            return

        if tag == 'table' :
            if self.state == ParserState.parsingRow :
                self.state = ParserState.done
            else :
                #??? unexpected </table> !!!
                self.state = ParserState.errorFound
        elif tag == 'th' :
            #th outside of parsingHeader is an error.
            if self.state == ParserState.parsingHeaderCell :
                self.state = ParserState.parsingHeaderRow
                if self.cellData.strip().lower() == 'author' :
                    if self.authorIndex == -1 :
                        self.authorIndex = self.nFields
                    else :#'Author' more than once.
                        self.state = ParserState.errorFound
                self.nFields += 1
            else :
                self.state = ParserState.errorFound
        elif tag == 'tr' :#</tr> must close <tr> only.
            if self.state == ParserState.parsingRow :
                if self.nFields != self.currentColumn :
                    self.state = ParserState.errorFound
            elif self.state == ParserState.parsingHeaderRow :
                if self.authorIndex == -1 or not self.nFields :
                    self.state = ParserState.errorFound
                else :
                    self.state = ParserState.parsingRow
                self.currentColumn = 0
            else :
                self.state = ParserState.errorFound
        elif tag == 'td' :#</td> must fo after <td>
            if self.state == ParserState.parsingCell :
                if self.currentColumn == self.authorIndex :
                    #we got a name!!!
                    name = self.cellData.strip()
                    if name :
                        self.table.append(name)
                self.state = ParserState.parsingRow
                self.currentColumn += 1
            else :
                self.state = ParserState.errorFound

    def handle_data(self, data) :
        if self.state == ParserState.parsingHeaderCell or self.state == ParserState.parsingCell :
            self.cellData += data

#_____________________________________________________________________
def sort_predicate(x, y) :
    #Sort names using a family name.
    name1 = x.split(' ')
    name2 = y.split(' ')
    if len(name1) == 2 and len(name2) == 2 :
        return cmp(name1[1], name2[1])
    return cmp(x, y)

#_____________________________________________________________________
def generate_rootheader(names) :
    if names :
        output = open("rootcoreteam.h", "w")
        output.write(header1)
        coreTeam = ''
        for name in names :
            if coreTeam :
                coreTeam += ',\n'
            coreTeam += '\t"' + name + '"'
        coreTeam += ','
        output.write(coreTeam)
        output.write(header2)

#_____________________________________________________________________
def main() :
    try :
        url = "http://root.cern.ch/gitstats/authors.html"
        html = urlopen(url).read()
        if html :
            parser = GitStatsParser()
            parser.feed(html)

            if parser.state != ParserState.errorFound and parser.table :
                names = parser.table
                #fix some problems:
                if 'CristinaCristescu' in names :
                    names.remove('CristinaCristescu')
                    names.append('Cristina Cristescu')
                if 'Stefan Roiser' in names :
                    names.remove('Stefan Roiser')
                names.append('Valeri Onuchine')
                names.sort(sort_predicate)
                generate_rootheader(names)
    except :
        pass


if __name__ == '__main__' :
   main()