#!/usr/bin/env python3
# Implements https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt 
# Copyright (C) 2018 Libor Polčák <ipolcak@fit.vutbr.cz>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


from onion_router import onion_router
from snapshot import Snapshot
from time_parser import TimeWrapper, FormatTimeWrapper
from ipfiles import create_ip_filename
from geolite2 import geolite2_accessor

import argparse
import os
import subprocess
import sys
import time

# While --update_keeprunning, fetch consensus 10 minutes later, the consensus are not
# available immediately
WAIT_FOR_CONSENSUS = 600

def create_snapshot_from_consensus_file(fname):
    lines = open(fname)
    obj = []
    orouters = []
    current = obj
    for line in lines:
        if line.startswith("r "):
            current = []
            orouters.append(current)
        elif line.startswith("directory-footer") or line.startswith("directory-signature"):
            # Older consensus files do not carry directory-footer beut they have multiple
            # directory-signatures
            current = obj
        current.append(line)
    return Snapshot(obj, [onion_router(orouter) for orouter in  orouters])

def write_preprocessed_from_consensus_file(fname, preprocess_abspath, last):
    snapshot = create_snapshot_from_consensus_file(fname)
    if snapshot.get_valid_after() > last:
        write_preprocessed(preprocess_abspath, snapshot)
        last = snapshot.get_valid_after()
    return last

def preprocess_directory_consensus(abspath, preprocess_abspath, last):
    subs = list(os.listdir(abspath))
    subs.sort()
    for sub in subs:
        last = preprocess_path_consensus("%s/%s" % (abspath, sub), preprocess_abspath, last)
    return last

def preprocess_path_consensus(abspath, preprocess_abspath, last):
    try:
        if os.path.isdir(abspath):
            last = preprocess_directory_consensus(abspath, preprocess_abspath, last)
        else:
            try:
                last = write_preprocessed_from_consensus_file(abspath, preprocess_abspath, last)
            except Exception as e:
                sys.stderr.write("%s: %s\n" % (abspath, str(e)))
    except OSError as e:
        sys.stderr.write("%s\n" % str(e))
    return last

def get_consensus_fullpath(consensus_abspath, t):
    g = time.gmtime(t)
    directory = "%s/%s" % (consensus_abspath, time.strftime("consensuses-%Y-%m/%d", g))
    fname = time.strftime("%Y-%m-%d-%H-00-00-consensus", g)
    return (directory, fname, "%s/%s" % (directory, fname))

def download_consensus_if_missing(consensus_abspath, t):
    directory, fname, fullpath = get_consensus_fullpath(consensus_abspath, t)
    os.makedirs(directory, exist_ok = True)
    if not os.path.isfile(fullpath):
        subprocess.run(["wget",
            "https://collector.torproject.org/recent/relay-descriptors/consensuses/%s" % fname],
            cwd = directory, stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL)
    return fullpath

def fixup_missing_consensus(consensus_abspath, preprocess_abspath, t):
    snapshot = create_snapshot_from_consensus_file(get_consensus_fullpath(consensus_abspath, t)[2])
    onion_router.request_additional = False # Make sure that we do not initiate DNS queries in the future
    for a in snapshot.get_ipaddrs():
        or_filename = create_ip_filename(preprocess_abspath, a, create_path=True)
        ors = parse_ors_from_file(or_filename)
        missing = snapshot.get_onion_router(a)
        index = 0
        for index, orouter in enumerate(ors):
            if orouter.get_inconsensus_val_after_unixtime() > t:
                break
            index += 1
        ors.insert(index, missing)
        with open(or_filename, "w") as f:
            for r in ors:
                r.append_lines(f)

def get_last_preprocessed_consensus(preprocess_abspath):
    last = 0
    try:
        with open("%s/last" % preprocess_abspath) as f:
            last = int(f.readline())
    except:
        last = 0
    return last

def process_path_consensus(consensus_abspath, preprocess_abspath):
    last = get_last_preprocessed_consensus(preprocess_abspath)
    last = preprocess_path_consensus(consensus_abspath, preprocess_abspath, last)

def update_path_consensus(consensus_abspath, preprocess_abspath):
    last = get_last_preprocessed_consensus(preprocess_abspath)
    next_t = last + 3600
    while next_t < time.time():
        try:
            consensus_path = download_consensus_if_missing(consensus_abspath, next_t)
            write_preprocessed_from_consensus_file(consensus_path, preprocess_abspath, 0)
        except FileNotFoundError:
            return next_t
        next_t += 3600
    return next_t

def write_preprocessed(preprocess_abspath, consensus_snapshot):
    if os.path.exists(preprocess_abspath):
        if not os.path.isdir(preprocess_abspath):
            raise NotImplementedError("%s exists but it is not a directory" % preprocess_abspath)
    else:
        os.makedirs(preprocess_abspath)
    ipaddresses = consensus_snapshot.get_ipaddrs()
    for a in ipaddresses:
        with open(create_ip_filename(preprocess_abspath, a, create_path=True), "a") as f:
            r = consensus_snapshot.get_onion_router(a)
            r.append_lines(f)
    with open("%s/%s" % (preprocess_abspath, "last"), "w") as f:
        f.write("%d\n" % consensus_snapshot.get_valid_after())

def parse_ors_from_file(fname):
    ors = []
    try:
        with open(fname) as f:
            current = []
            for line in f:
                if line.startswith("r ") and current:
                    ors.append(onion_router(current))
                    current = []
                current.append(line)
            if current:
                ors.append(onion_router(current))
    except FileNotFoundError:
        pass # Return empty list
    return ors

def add_geolite_data_to_onion_routers(ipaddr, ors, geolite_acc):
    if geolite_acc:
        for r in ors:
            try:
                r.append_geolite_data(ipaddr, geolite_acc)
            except FileNotFoundError as e:
                sys.stderr.write("Adding geolite data failed: %s" % str(e))

def find_preprocessed_ip_address(ipaddr, preprocessed_dir, geolite_acc = None):
    try:
        ors = parse_ors_from_file(create_ip_filename(preprocessed_dir, ipaddr))
    except FileNotFoundError:
        return []
    if geolite_acc:
        add_geolite_data_to_onion_routers(ipaddr, ors, geolite_acc)
    return ors

def find_preprocessed_ip_address_time_filter(ipaddr, preprocessed_dir, time_search, date_prefix,
        geolite_acc):
    def check_orouter_time(orouter):
        t0 = TimeWrapper(" ".join(orouter.get_inconsensus_val_after()))
        te = TimeWrapper(" ".join(orouter.get_inconsensus_val_until()))
        return t0.get() <= time_search.get() <= te.get()
    def check_orouter_date_prefix(orouter):
        return orouter.get_inconsensus_val_after()[0].startswith(date_prefix) or \
                orouter.get_inconsensus_val_until()[0].startswith(date_prefix) or \
                orouter.get_inconsensus_fresh_until()[0].startswith(date_prefix)
    filter_func = check_orouter_time if time_search else \
            check_orouter_date_prefix if date_prefix else lambda r: True
    ors = find_preprocessed_ip_address(ipaddr, preprocessed_dir)
    filtered_ors = list(filter(filter_func, ors))
    add_geolite_data_to_onion_routers(ipaddr, filtered_ors, geolite_acc)
    return filtered_ors

def output_preprocessed_ip_addresss_time_filter(ipaddr, preprocessed_dir, time_search, date_prefix,
        geolite_acc, outfile):
    found = find_preprocessed_ip_address_time_filter(ipaddr, preprocessed_dir, time_search, date_prefix,
            geolite_acc)
    if not found:
        if time_search:
            outfile.write("IP address %s was not active in Tor at %s\n" % (ipaddr, time_search))
        elif date_prefix:
            outfile.write("IP address %s was not active in Tor during date prefix %s\n" % (ipaddr,
                date_prefix))
    else:
        found = merge_subsequent_ors(found)
        # print the output
        outfile.write("[")
        for r in found:
            r.output_json(outfile)
            outfile.write(",")
        outfile.write("]")

def merge_subsequent_ors(ors):
    """ Merges subsequent OR entries in given iterable.

    Returns a new iterable with possibly merged ors.

    Note that it merges subsequent entries only if they match in all properties
    including IP addresses. Only neighbour entries are examined.
    """
    if len(ors) > 1:
        # Merge entries if possible
        merged = []
        current = ors[0]
        for r in ors[1:]:
            if not current.merge(r):
                merged.append(current)
                current = r
        merged.append(current)
        ors = merged
    return ors

def get_ip_address_activity(ipaddr, preprocessed_dir, time_format = "%Y-%m-%d"):
    TOR_DATE_FORMAT = "%Y-%m-%d"
    ors = find_preprocessed_ip_address(ipaddr, preprocessed_dir)
    last_date = 0
    ret = []
    for r in ors:
        val_after = r.get_inconsensus_val_after()[0]
        val_until = r.get_inconsensus_val_until()[0]
        for d in [val_after, val_until]:
            if time_format != TOR_DATE_FORMAT:
                ts_repr = FormatTimeWrapper(d, TOR_DATE_FORMAT).format(time_format)
            else:
                ts_repr = d
            ts_val = FormatTimeWrapper(ts_repr, time_format).get()
            if ts_val > last_date:
                ret.append(ts_repr)
                last_date = ts_val
    return ret

# Argument handling
def process_args():
    parser = argparse.ArgumentParser(description="TARZAN consensus descriptors parser")
    processing_group = parser.add_argument_group("Preprocess consensus files",
            "Download the latest consensus files and preprocess the IP addresses listed in the files")
    processing_group.add_argument("--consensus_path", "-c", help="The input file or directory with consensuses.")
    preprocessing_type_group = processing_group.add_mutually_exclusive_group()
    preprocessing_type_group.add_argument("--update", "-u", help="Fetch and parse new consensuses.",
            action = "store_true")
    preprocessing_type_group.add_argument("--update_keeprunning", "-U", help="Keep running and fetch and update new consensuses",
            action = "store_true")
    preprocessing_type_group.add_argument("--fixup", "-f", help="Insert missing consensus data of given time (UTC) -- sometimes data appears in the archive with a delay and future consensuses are already available and preprocessed, fix such situations",
            type = TimeWrapper)
    processing_group.add_argument("--write_preprocessed", "-w",
            help = "The path where preprocessed consensus will be writen.")
    processing_group.add_argument("--additional_info", "-a",
            help = "For each IP address gather additional info like reverse DNS.",
            action = "store_true")
    processing_group.add_argument("--email", "-m",
            help = "E-mail address to be notified when a consensus is not available on time")
    search_group = parser.add_argument_group("Search for IP address",
            "Search for given IP address in the preprocessed files.")
    search_group.add_argument("--ipaddress", "-i", help="The IP address to be searched for.")
    search_group.add_argument("--preprocessed_input", "-p",
        help="The preprocessed directory with consensuses sorted by IP address.")
    search_group.add_argument("--geolite_dir", "-g",
            help="""The directory with geolite CSVs. The CSVs are expected to be in subdirectories
            contained in the downloadable zip files""")
    time_filter_group = search_group.add_mutually_exclusive_group()
    time_filter_group.add_argument("--time", "-t", type=TimeWrapper,
            help="Time of the search for the IP address (UTC)")
    time_filter_group.add_argument("--date_prefix", "-d",
            help="Specify the date prefix for the search -- prefix of YYYY-MM-DD")
    return parser.parse_args()

def main(args, stdout):
    if args.consensus_path:
        if args.additional_info:
            onion_router.request_additional = True
        if not args.write_preprocessed:
            sys.stderr.write("Please specify --write_preprocessed argument.\n")
            sys.exit(100)
        if args.update or args.update_keeprunning:
            emailnotice_counter = 0
            while True: # Breaked for args.update during the first run
                next_t = update_path_consensus(os.path.abspath(args.consensus_path),
                    os.path.abspath(args.write_preprocessed))
                if args.update_keeprunning:
                    sleep_time = next_t + WAIT_FOR_CONSENSUS - time.time()
                    stdout.write("I'm going to sleep [s] %d\n" % sleep_time)
                    try:
                        time.sleep(sleep_time)
                    except ValueError:
                        if args.email:
                            emailnotice_counter += 1
                            if emailnotice_counter % 60 == 0:
                                p = subprocess.Popen(["sendmail", args.email],
                                        stdout = subprocess.PIPE, stdin = subprocess.PIPE)
                                p.stdin.write(b'Consensus missing, trying to continue')
                                p.communicate()
                                p.stdin.close()
                        time.sleep(60) # Probably, there should be a consensus ready, sleep for a minute anyway
                    stdout.write("I woke up at %s\n" % str(TimeWrapper(time.time())))
                else: # args.update without args.update_keeprunning
                    break
        elif args.fixup:
            fixup_missing_consensus(os.path.abspath(args.consensus_path),
                    os.path.abspath(args.write_preprocessed), args.fixup.get())
        else:
            process_path_consensus(os.path.abspath(args.consensus_path),
                os.path.abspath(args.write_preprocessed))

    if args.preprocessed_input and args.ipaddress:
        output_preprocessed_ip_addresss_time_filter(args.ipaddress, args.preprocessed_input, args.time,
                args.date_prefix, geolite2_accessor(args.geolite_dir), stdout)
    elif args.ipaddress:
        raise NotImplementedError("Search for IP address supported only in preprocessed files.")

if __name__ == "__main__":
    args = process_args()
    main(args, sys.stdout)
