#!/usr/bin/python

# Analyze rsync logfile to list rsync users per rsync module
# 
# This script uses Python generators, which means that it doesn't allocate memory
# It rather works like a Unix pipe.
# 
# It transparently opens uncompressed, gzip or bzip2 compressed files.
# 
# The implementation is based on David Beazley's PyCon UK 08 great talk about
# generator tricks for systems programmers.
#
#
#
# Copyright 2008,2009,2010,2011,2012 Peter Poeml
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation;
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA


__version__='2.0'
__author__='Peter Poeml <poeml@cmdline.net>'
__copyright__='Peter poeml <poeml@cmdline.net>'
__license__='GPLv2'
__url__='http://mirrorbrain.org'


# 2008/09/19 15:26:53 [16972] rsync on opensuse-full from ftp1.yz.yamagata-u.ac.jp (133.24.255.159)

try:
    set
except NameError:
    from sets import Set as set     # Python 2.3 fallback

try:
    sorted
except NameError:
    def sorted(in_value):           # Python 2.3 fallback
        "A naive implementation of sorted"
        out_value = list(in_value)
        out_value.sort()
        return out_value

def gen_find(filepat, top): 
    """Generate all filenames in a directory tree 
    that match a given filename pattern"""
    import os 
    import fnmatch 
    for path, dirlist, filelist in os.walk(top): 
        for name in fnmatch.filter(filelist,filepat): 
            yield os.path.join(path,name)  

def gen_open(filenames): 
    """Open a sequence of filenames"""
    import gzip, bz2 
    for name in filenames: 
        if name.endswith(".gz"): 
             yield gzip.open(name) 
        elif name.endswith(".bz2"): 
             yield bz2.BZ2File(name) 
        else: 
             yield open(name) 

def gen_cat(sources): 
    """Concatenate items from one or more 
    source into a single sequence of items"""
    for s in sources: 
        for item in s: 
            yield item 


def gen_grep(pat, lines): 
    """Generate a sequence of lines that contain 
    a given regular expression"""
    import re 
    patc = re.compile(pat) 
    for line in lines: 
        if patc.search(line): yield line 



def main():
    """
    Create a generator pipeline for the matching log file lines
    and process them.
    """
    import sys

    if not len(sys.argv[1:]):
        sys.exit('Usage: rsyncusers LOGFILE [LOGFILE ...]')

    pat = r'rsync on '

    #logdir = '/var/log'
    #logpat = 'rsyncd.log'
    #filenames  = gen_find(logpat, logdir) 

    filenames = sys.argv[1:]
    logfiles   = gen_open(filenames)
    loglines   = gen_cat(logfiles)
    patlines   = gen_grep(pat, loglines)

    modules = dict()

    for i in patlines:

        words = i.split()
        module, host, ip = words[5], words[7], words[8]

        # keep only up to the first slash
        if '/' in module:
            module = module[ : module.find('/')]

        # strip parens from ip
        ip = ip[1:-1]
            
        if not modules.has_key(module):
            modules[module] = set()

        modules[module].add((ip, host))


    for module in sorted(modules.keys()):
        print 
        print module
        hosts = modules[module]
        for host in sorted(hosts):
            print '  %-16s %s' % host


if __name__ == '__main__':
    main()