#!/usr/bin/python
#
# Utility for archiving/purging audit-log tables
#
# Copyright (c) 2013--2015 Red Hat, Inc.
#
#
# This software is licensed to you under the GNU General Public License,
# version 2 (GPLv2). There is NO WARRANTY for this software, express or
# implied, including the implied warranties of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
# along with this software; if not, see
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
#
# Red Hat trademarks are not licensed under GPLv2. No permission is
# granted to use or replicate Red Hat trademarks that are incorporated
# in this software or its documentation.
#

"""spacewalk-archive-audits - a tool for extracting and purging the _log tables in the SW database"""

import csv
import os
import sys
import time

from optparse import OptionParser
from datetime import datetime

try:
    from spacewalk.common import rhnLog
    from spacewalk.common.rhnConfig import CFG, initCFG
    from spacewalk.common.rhnLog import log_clean
    from spacewalk.server import rhnSQL
except ImportError:
    # pylint: disable=F0401
    _LIBPATH = '/usr/share/rhn'
    if _LIBPATH not in sys.path:
        sys.path.append(_LIBPATH)
    from server import rhnSQL
    from common import rhnLog
    from common.rhnLog import log_clean
    from common.rhnConfig import CFG, initCFG

LOG_LOCATION = '/var/log/rhn/archive-audit-tables.log'
LOG_COLS = ['id', 'stamp', 'user_id']

SQL_PIECES = {'oracle': {
    'COUNT': 'select count(*) from %(table)s',
    'FIND_COLS': "select column_name from user_tab_columns where table_name = :the_table",
    'FIND_TABLE': "select table_name from user_tables where table_name like '%|_LOG' escape '|'",
                  'GET_DATA': "select %(cols)s, %(tcols)s from %(tname)s t inner join log l on l.id = t.log_id where l.stamp <= to_date(:tstamp, 'YYYY-MM-DD hh24:mi:ss') order by l.id",
                  'PURGE_DATA' : """
                    delete %(ptable)s
                     where exists (
                       select *
                         from log l
                        where l.id = %(ptable)s.log_id
                          and l.stamp <= to_date(:tstamp, 'YYYY-MM-DD hh24:mi:ss')
                       )
                  """,
                  'PURGE_LOG_DATA': "delete from log l where l.stamp <= to_date(:tstamp, 'YYYY-MM-DD hh24:mi:ss')"
},
    'postgresql': {
    'COUNT': 'select count(*) from %(table)s;',
    'FIND_COLS': 'select column_name from information_schema.columns where table_name = :the_table;',
    'FIND_TABLE': "select table_name from information_schema.tables where table_name like '%!_log' escape '!';",
                  'GET_DATA': 'select %(cols)s, %(tcols)s from %(tname)s t inner join log l on l.id = t.log_id where l.stamp <= (timestamp :tstamp) order by l.id;',
                  'PURGE_DATA': 'delete from %(ptable)s t using log l where l.id = t.log_id and l.stamp <= (timestamp :tstamp);',
                  'PURGE_LOG_DATA': 'delete from log l where l.stamp <= (timestamp :tstamp);'
}
}


def setup_options():
    usage = 'usage: %prog --list --archive --purge --timestamp'
    parser = OptionParser(usage=usage)

    parser.add_option('-l', '--list', action='store_true', dest='list',
                      help='list number of log entries before --timestamp (or current size if --timestamp not specified), of all *_log tables')
    parser.add_option('-a', '--archive', action='store_true', default=False, dest='archive',
                      help='archive as CSV the contents of all *_log tables')
    parser.add_option('-p', '--purge', action='store_true', dest='purge',
                      help='purge all entries made on or before --timestamp')
    parser.add_option('-t', '--timestamp', action='store', dest='purge_date', metavar='YYYY-MM-DD hh24:mi:ss',
                      help='all entries made at or before this time will be purged')

    return parser


def validate_options():
    parser = setup_options()
    (options, args) = parser.parse_args()
    if not (options.list or options.archive or options.purge):
        parser.error("At least one of --list, --archive, --purge is required")

    if (options.archive or options.purge) and not options.purge_date:
        parser.error("--timestamp is required for --archive and --purge")

    if options.purge_date:
        try:
            time.strptime(options.purge_date, '%Y-%m-%d %H:%M:%S')
        except ValueError:
            parser.error('--timestamp [' + options.purge_date + '] not in format YYYY-MM-DD hh24:mi:ss')

    return (options, args)


def _log(stmt):
    logger = os.getenv("SUDO_USER")
    if logger == None:
        logger = 'root'
    log_clean(0, '%s|%s|%s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), logger, stmt))


def _get_sql(key):
    return SQL_PIECES.get(CFG.DB_BACKEND).get(key)


def _get_allcols(name):
    all_cols = []
    all_cols.extend(['l' + x for x in LOG_COLS])
    all_cols.extend(columns(name))
    return all_cols


def _get_data_statement(name):
    cols = ', '.join(['l.' + x + ' l' + x for x in LOG_COLS])
    tcols = ', '.join(['t.' + x for x in columns(name)])
    data_stmt = _get_sql('GET_DATA') % {"cols": cols, "tcols": tcols, "tname": name}
    return data_stmt


def tables():
    """What are the tables whose names fit the pattern *_log?"""
    stmt = rhnSQL.execute(_get_sql('FIND_TABLE'))
    table_names = stmt.fetchall() or []
    rc = []
    for name in table_names:
        rc.append(name[0])
    return rc


def columns(tbl):
    """What are the columns of table <tbl>?"""
    stmt = rhnSQL.execute(_get_sql('FIND_COLS'), the_table=tbl)
    col_names = stmt.fetchall() or []
    rc = []
    for name in col_names:
        rc.append(name[0])
    return rc


def count(tbl):
    """How many rows are in table <tbl>?"""
    tbl_str = _get_sql('COUNT') % {"table": tbl}
    stmt = rhnSQL.execute(tbl_str)
    num_rows = stmt.fetchone()
    return num_rows[0]


def list(opt):
    """List current state of all *_log tables found"""
    names = tables()
    for name in names:
        print '%32s : %d' % (name, count(name))


def archive(opt):
    """Grab the requested data and store it in a .csv file 'here'"""
    _log('Archiving everything before ' + opt.purge_date)
    names = tables()
    for name in names:
        rows = 0
        outfilename = name + '.' + (datetime.now().strftime('%Y-%m-%d_%H:%M:%S')) + '.csv'

        try:
            f = open(outfilename, 'wb')
        except IOError, e:
            print 'FAILED to open output file [' + outfilename + ']'
            sys.exit(1)

        writer = csv.writer(f)
        hdr_names = _get_allcols(name)
        writer.writerow(hdr_names)
        stmt = rhnSQL.execute(_get_data_statement(name), tstamp=opt.purge_date)
        rslts = None
        while 1:
            rslts = stmt.fetchone_dict()
            if not rslts:
                break
            row = []
            for col in hdr_names:
                row.append(rslts.get(col))
            writer.writerow(row)
            rows += 1
        print '%32s : %d rows archived' % (name, rows)
        f.close()


def purge(opt):
    """Remove from the _log tables anything that happened before <timestamp>"""
    _log('Purging everything before ' + opt.purge_date)
    names = tables()
    for name in names:
        del_str = _get_sql('PURGE_DATA') % {"ptable": name}
        stmt = rhnSQL.execute(del_str, tstamp=opt.purge_date)
        print '%32s : %s rows remain' % (name, count(name))
        _log('%32s : %s rows remain' % (name, count(name)))
    rhnSQL.execute(_get_sql('PURGE_LOG_DATA'), tstamp=opt.purge_date)
    return

if __name__ == '__main__':
    (options, args) = validate_options()
    initCFG('server')
    rhnSQL.initDB()

    if options.list:
        list(options)
        sys.exit(0)

    rhnLog.initLOG(LOG_LOCATION)
    if rhnLog.LOG.real == 0:
        print "Failed to access logfile " + LOG_LOCATION + " - exiting..."
        print "(Try running as sudo)"
        sys.exit(1)

    if options.purge:
        print 'Starting counts:'
        list(options)
        print '...archiving'
        rhnSQL.transaction('PURGE')
        archive(options)
        print '...purging'
        purge(options)
        rhnSQL.commit()
    elif options.archive:
        print 'Archiving...'
        archive(options)
