File Manager

Path: /opt/alt/python27/lib/python2.7/site-packages/postomaat/plugins/

Viewing File: suspect_collect.py

"""
Plugin will extract data from configured backends
using query/task/mapping configurations provided
in a form of file/database/etc

This plugin will set additional tags for suspect object
those tags can then be accessed and used by other plugins
"""

__version__ = "0.0.1"

import os
import re
from collections import defaultdict
from postomaat.extensions.sql import SQL_EXTENSION_ENABLED, get_session
from postomaat.shared import DUNNO, ScannerPlugin

if SQL_EXTENSION_ENABLED:
    from sqlalchemy.sql import select, column, table, text


class Query(object):
    def __init__(self):
        self.columns = list()
        self.table = str()
        self.filters = list()
        self.column_fmap = dict()
        self.filter_fmap = dict()
        self.statement = None

    def __str__(self):
        return "<Query columns={columns:s} table={table:s} filters={filters:s}>".format(
            columns=self.columns,
            table=self.table,
            filters=self.filters
        )


class SuspectCollect(ScannerPlugin):
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)
        self.backendconfig = self.config.get(self.section, 'backendconfig')
        self.queryfile = self.config.get(self.section, 'queryfile')
        self.requiredvars = {
            "backendconfig": {
                'default': 'mysql://root@localhost/sender_meta_db?charset=utf8',
                'description': 'SQLAlchemy Connection string'
            },
            "queryfile": {
                'default': "/etc/postomaat/conf.d/suspectcollect.queries",
                'description': 'file with queriy configs to run for data collection'
            }
        }
        self.logger = self._logger()
        self.queries = None

    def load_queries(self, queryconfig):
        patt = re.compile(r'^select\s+columns=(?P<columns>[^\s]+)\s+from=(?P<from>[^\s]+)\s+filters=(?P<filters>[^\s]+)$') #pylint: disable=C0301
        queries = []
        lineno = 0
        for line in queryconfig.split('\n'):
            lineno += 1
            line = line.strip()

            if line.startswith('#') or line.strip() == '':
                continue

            match = patt.match(line)
            if match is None:
                self.logger.error('cannot parse limiter config line %d', lineno)
                continue

            gdict = match.groupdict()
            query = Query()
            query.columns = gdict['columns']
            query.table = gdict['from']
            query.filters = gdict['filters']
            queries.append(query)
        return queries

    def get_fieldmap(self, fieldstring):
        """
        Create databse column name to Postomaat suspect tag map based on the query string
        by default, database column name is the same as Postomaat suspect tag name,
        but, to override the mapping, the config can be in the form:

        db_column_name:suspect_tag

        eg.
        select columns=order_id:orderid,last_login:lastlogin from=mailbox filterby=address:sender

        You will send (1):
            - order_id:orderid,last_login:lastlogin
            - as a result select will:
                - select 'order_id' column from database as 'orderid' suspect tag
                - select 'last_login' column from database as 'lastlogin' suspect tag

        You will send (2):
            - address:sender
            - as a result select will:
                - filter by 'address' column in database using suspect tag 'sender' as value

        :param fields: list of strings generate fieldmap from (type: list)
        :return: dictionary of fieldmaps (type: dict)
        """

        fields = fieldstring.split(',')

        fieldmap = {}
        for field in fields:
            if ':' in field:
                (db_column, suspect_tag) = field.split(':', 1)
                fieldmap[db_column] = suspect_tag
            else:
                fieldmap[field] = field
        return fieldmap

    def get_suspect_attribute(self, suspect, attribute):
        if attribute == 'from_address':
            attribute_value = suspect.from_address
        elif attribute == 'from_domain':
            attribute_value = suspect.from_domain
        elif attribute == 'to_address':
            attribute_value = suspect.to_address
        elif attribute == 'to_domain':
            attribute_value = suspect.to_domain
        else:
            attribute_value = suspect.get_value(attribute)

        if attribute_value is None:
            self.logger.warning("Suspect does not have attribute %s. Typo or missing plugin?",
                                attribute)
        return attribute_value

    def build_query(self, columns=None, from_table=None, filters=None):
        """Build SQL query object

        :param columns: list of columns (type: list)
        :param table: table name to select from (type: string)
        :param filter: list of filters to build 'where' clause (AND) (type: list)
        :return: returns query object (type: sqlalchemy.engine.result.RowProxy)
        """
        stmt = (
            select()
            .with_only_columns([column(c) for c in columns])
            .select_from(table(from_table))
            .limit('1')
        )
        for filter_string in filters:
            stmt = stmt.where(text(filter_string))
        return stmt

    def add_tags(self, suspect, query, db_result):
        for db_column, value in db_result.items():
            tag = query.column_fmap[db_column]
            suspect.tags[tag] = value

    def lint(self):
        if not SQL_EXTENSION_ENABLED:
            print("sqlalchemy is not installed")
            return False

        if not self.checkConfig():
            return False

        return True

    def examine(self, suspect):
        session = get_session(self.backendconfig)

        if self.queries is None:
            filename = self.queryfile
            if not os.path.exists(filename):
                self.logger.error('Limiter config file %s not found', filename)
                return None
            with open(filename) as filehandle:
                queryconfig = filehandle.read()
            self.queries = self.load_queries(queryconfig)

            for query in self.queries:
                query.column_fmap = self.get_fieldmap(query.columns)
                query.filter_fmap = self.get_fieldmap(query.filters)
                query.statement = self.build_query(
                    columns=query.column_fmap.keys(),
                    filters=[
                        str(column) + ' = :' + str(tag) for column, tag in query.filter_fmap.items()
                    ],
                    from_table=query.table
                )
            self.logger.info('Found %d query configurations', len(self.queries))

        for query in self.queries:
            # We GET attributes ("real" data of suspect), but SET tags (additional data)
            filter_data = defaultdict()
            for _, attribute in query.filter_fmap.items():
                filter_data[attribute] = self.get_suspect_attribute(suspect, attribute)

            db_result = None

            try:
                db_result = session.execute(
                    query.statement,
                    filter_data
                ).fetchone()
            except Exception as err:
                self.logger.critical(
                    "Got exception while running query %s with parameters %s. Exception was: %s",
                    query.statement,
                    filter_data,
                    err
                )

            if db_result is None:
                self.logger.warning(
                    "Query %s did not return result with parameters %s. Not trying to add tags for this suspect.",
                    query.statement,
                    filter_data
                )
            else:
                self.add_tags(suspect, query, db_result)

        return DUNNO

    def __str__(self):
        return "SuspectCollect"