#!/usr/bin/python3
# -*- python -*-
# PiGx RNAseq Pipeline.
#
# Copyright © 2017 Bora Uyar <bora.uyar@mdc-berlin.de>
# Copyright © 2017 Jonathan Ronen <yablee@gmail.com>
# Copyright © 2017, 2018 Ricardo Wurmus <ricardo.wurmus@mdc-berlin.de>
#
# This file is part of the PiGx RNAseq Pipeline.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

if 'yes' == 'yes':
    import os, sys
    os.environ["R_LIBS_SITE"] = ""
    os.environ["PYTHONPATH"] = ""
    # Setting PYTHONPATH does not help with dependencies needed for
    # this script.
    for directory in "".split(":"):
        try:
            sys.path.index(directory)
        except ValueError:
            sys.path.insert(1, directory)


import argparse
from os import path
import os, sys, json, csv, yaml
from snakemake.utils import update_config
import shutil, filecmp
from glob import glob
import subprocess

description = """\
PiGx RNAseq Pipeline.

PiGx RNAseq is a data processing pipeline for RNAseq read data.
"""

epilog = 'This pipeline was developed by the Akalin group at MDC in Berlin in 2017-2018.'

version = """\
PiGx RNAseq Pipeline.
Version: 0.0.9

Copyright © 2017, 2018 Bora Uyar, Jona Ronen, Ricardo Wurmus.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>.

This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
"""

def formatter(prog):
    return argparse.RawTextHelpFormatter(prog, max_help_position=80)

parser = argparse.ArgumentParser(description=description,
                                 epilog=epilog,
                                 formatter_class=formatter)

parser.add_argument('-v', '--version', action='version',
                    version=version)

parser.add_argument('sample_sheet', nargs='?', default='sample_sheet.csv',
                    help="""\
The sample sheet containing sample data in CSV format.\
""")

group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--init', dest='init', choices=['settings', 'sample-sheet', 'both'], const='both', nargs='?',
                   help='Generate a template SETTINGS file, a SAMPLE-SHEET.  Leave empty for both.')
group.add_argument('-s', '--settings', dest='settings',
                   help='A YAML file for settings that deviate from the defaults.')

parser.add_argument('-c', '--configfile', dest='configfile', default='./config.json',
                    help="""\
The config file used for calling the underlying snakemake process.  By
default the file 'config.json' is dynamically created from the sample
sheet and the settings file.
""")

parser.add_argument('--target', dest='target', action='append',
                    help="""\
Stop when the named target is completed instead of running the whole
pipeline.  The default target is "final-report".  Pass "--target=help"
to describe all available targets.""")

parser.add_argument('-n', '--dry-run', dest='dry_run', action='store_true',
                    help="""\
Only show what work would be performed.  Do not actually run the
pipeline.""")

parser.add_argument('--graph', dest='graph',
                    help="""\
Output a graph in Graphviz dot format showing the relations between
rules of this pipeline.  You must specify a graph file name such as
"graph.pdf".""")

parser.add_argument('--force', dest='force', action='store_true',
                    help="""\
Force the execution of rules, even though the outputs are considered
fresh.""")

parser.add_argument('--reason', dest='reason', action='store_true',
                    help="""\
Print the reason why a rule is executed.""")

parser.add_argument('--unlock', dest='unlock', action='store_true',
                    help="""\
Recover after a snakemake crash.""")

args = parser.parse_args()


# Generate config file
def bail(msg):
    """Print the error message to stderr and exit."""
    print(msg, file=sys.stderr)
    exit(1)


def generate_config(configfile, sample_sheet, settingsfile, dirs):
    """Generate a new configuration file CONFIGFILE using SAMPLESHEET and
SETTINGSFILE as inputs.  Use the locations in DIRS to find default
settings.
    """
    # Load defaults
    if os.getenv('PIGX_UNINSTALLED'):
        where = os.getenv('srcdir') if os.getenv('srcdir') else '.'
        defaults = path.join(where, 'etc/settings.yaml')
    else:
        defaults = path.join(dirs['locations']['pkgdatadir'], 'settings.yaml')

    if not path.exists(defaults):
        bail("Could not find default settings.  Did you forget to set PIGX_UNINSTALLED?")

    settings = yaml.safe_load(open(defaults, 'r'))

    # Load user overrides
    if settingsfile:
        update_config(settings,yaml.safe_load(open(settingsfile, 'r')))

    settings['execution']['target'] = args.target
    settings['locations'].update(dirs['locations'])

    # Resolve relative paths in the locations section
    root = path.dirname(sample_sheet)
    here = os.getenv('srcdir') if os.getenv('srcdir') else os.getcwd()

    for key in settings['locations']:
        settings['locations'][key] = path.normpath(path.join(here, root, settings['locations'][key]))

    # Record the location of the sample sheet.
    settings['locations']['sample-sheet'] = path.abspath(sample_sheet)

    # Write the config file
    with open(configfile, 'w') as outfile:
        dumps = json.dumps(settings,
                           indent=4, sort_keys=True,
                           separators=(",",": "), ensure_ascii=True)
        outfile.write(dumps)

def generate_cluster_configuration():
    rules = config['execution']['rules']

    cluster_conf = {}
    for rule in rules:
        cluster_conf[rule] = {
            'nthreads': rules[rule]['threads'],
            'q': config['execution']['cluster']['queue'],
            'MEM': rules[rule]['memory'],
            'h_stack': config['execution']['cluster']['stack']
        }

    cluster_config_file = "cluster_conf.json"
    with open(cluster_config_file, 'w') as outfile:
        dumps = json.dumps(cluster_conf,
                           indent=4, sort_keys=True,
                           separators=(",",": "), ensure_ascii=True)
        outfile.write(dumps)
    return cluster_config_file


# Create symbolic links to the inputs and reference genome

# Create links within the output folder that point directly to the
# reference genome, as well as to each sample input file so that it's
# clear where the source data came from.

# N.B. Any previously existing links will be kept in place, and no
# warning will be issued if this is the case.

def makelink(src, target):
    if not path.isfile(src):
        bail("Refusing to link non-existent file %s" % src)
    elif not path.isdir(path.dirname(target)):
        bail("%s or subdirectory does not exist for linking %s" % config['locations']['output-dir'], target)
    else:
        try:
            os.symlink(src, target)
        except FileExistsError:
            pass

def maybe_copy(source, target_dir):
    target = path.join(target_dir, path.basename(source))
    if not path.exists(target) or not filecmp.cmp(source, target):
        shutil.copy(source, target_dir)

def prepare_links():
    os.makedirs(path.join(config['locations']['output-dir'], 'pigx_work'),
                exist_ok=True)
    # Ensure that we use the configured Pandoc, pandoc-citeproc
    # ...and the configured Rscript
    bin = path.join(config['locations']['output-dir'], 'pigx_work/bin')
    if path.exists(bin): shutil.rmtree(bin)
    os.makedirs(bin, exist_ok=True)
    os.symlink('/usr/bin/pandoc', path.join(bin, "pandoc"))
    os.symlink('/usr/bin/pandoc-citeproc', path.join(bin, "pandoc-citeproc"))
    os.symlink('/usr/bin/Rscript', path.join(bin, "Rscript"))
    os.environ['PATH'] = path.abspath(bin) + ":" + os.environ['PATH']
    os.environ['R_LIBS_USER'] = "/dev/null"

def display_logo():
    if os.getenv('PIGX_UNINSTALLED'):
        where = os.getenv('srcdir') if os.getenv('srcdir') else '.'
        pretty = path.join(where, 'etc/pretty.txt')
    else:
        pretty = path.join(config['locations']['pkgdatadir'], 'pretty.txt')

    if not os.getenv('PIGX_UGLY'):
        # Ensure that this is printed without errors, even if the user's
        # locale is not a UTF-8 locale.
        p = open(pretty, 'r', encoding='utf-8').read()
        sys.stdout.buffer.write(p.encode('utf-8'))



# Determine locations
dirs = {}
if os.getenv('PIGX_UNINSTALLED'):
    here = os.getenv('srcdir') if os.getenv('srcdir') else os.getcwd()
    dirs['locations'] = {
        'prefix'       : here,
        'exec_prefix'  : here,
        'libexecdir'   : here,
        'pkglibexecdir': here,
        'datarootdir'  : here,
        'pkgdatadir'   : here
    }
else:
    # Expand and store autoconf directory variables
    prefix = '/usr'
    exec_prefix = '${prefix}'[1:].format(prefix=prefix)
    libexecdir = '${exec_prefix}/libexec'[1:].format(exec_prefix=exec_prefix)
    pkglibexecdir = '{libexecdir}/pigx_rnaseq'.format(libexecdir=libexecdir)
    datarootdir = '${prefix}/share'[1:].format(prefix=prefix)
    pkgdatadir = '${datarootdir}/pigx_rnaseq'[1:].format(datarootdir=datarootdir)

    dirs['locations'] = {
        'prefix'       : '/usr',
        'exec_prefix'  : exec_prefix,
        'libexecdir'   : libexecdir,
        'pkglibexecdir': pkglibexecdir,
        'datarootdir'  : datarootdir,
        'pkgdatadir'   : pkgdatadir
    }

# Init?
if args.init:
    init_settings = False
    init_sample_sheet = False
    if os.getenv('PIGX_UNINSTALLED'):
        base = os.getcwd() + '/etc/'
    else:
        base = dirs['locations']['pkgdatadir']

    if args.init == 'both':
        init_settings = True
        init_sample_sheet = True

    if args.init == 'settings' or init_settings:
        name = 'settings.yaml'
        if path.exists(name):
            print('Refusing to overwrite existing {}.'.format(name))
        else:
            with open(name, 'w') as outfile:
                with open(path.join(base, name), 'r') as infile:
                    for line in infile:
                        outfile.write('# ' + line)
            print('Generated {} template.'.format(name))

    if args.init == 'sample-sheet' or init_sample_sheet:
        name = 'sample_sheet.csv'
        if path.exists(name):
            print('Refusing to overwrite existing sample_sheet.csv.')
        else:
            shutil.copy(path.join(base, name + '.example'), name)
            print('Generated {} template.'.format(name))
    exit(0)

# Run snakemake!
generate_config(args.configfile,
                args.sample_sheet,
                args.settings,
                dirs)

config = json.load(open(args.configfile, 'r'))

command = [
    "/usr/bin/snakemake",
    "--printshellcmds",
    "--snakefile={}/pigx_rnaseq.py".format(config['locations']['pkglibexecdir']),
    "--configfile={}".format(args.configfile),
    "--directory={}".format(config['locations']['output-dir']),
    "--jobs={}".format(config['execution']['jobs']),
]

if config['execution']['submit-to-cluster']:
    cluster_config_file = generate_cluster_configuration()
    print("Commencing snakemake run submission to cluster", flush=True, file=sys.stderr)
    if config['execution']['cluster']['contact-email'].lower() == 'none':
        contact_email_string = ""
    else:
        contact_email_string = "-m a -M %s" % config['execution']['cluster']['contact-email']

    try:
        subprocess.call(["qsub", "-help"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    except OSError as e:
        if e.errno == os.errno.ENOENT:
            print("Error: Your system does not seem to support cluster submission.\nReason: Could not find qsub executable.", file=sys.stderr)
            exit(1)
        else:
            raise
    qsub = "qsub -v R_LIBS_USER -v PATH -v GUIX_LOCPATH -l h_stack={cluster.h_stack}  -l h_vmem={cluster.MEM} %s -b y -pe smp {cluster.nthreads} -cwd" % contact_email_string
    if config['execution']['cluster']['args']:
        qsub += " " + config['execution']['cluster']['args']
    command += [
        "--cluster-config={}".format(cluster_config_file),
        "--cluster={}".format(qsub),
        "--jobscript={}/qsub-template.sh".format(config['locations']['pkglibexecdir']),
        "--latency-wait={}".format(config['execution']['cluster']['missing-file-timeout'])
    ]
else:
    print("Commencing snakemake run submission locally", flush=True, file=sys.stderr)

command.append("--rerun-incomplete")
if args.graph:
    command.append("--dag")
    with open(args.graph, "w") as outfile:
        p1 = subprocess.Popen(command, stdout=subprocess.PIPE)
        p2 = subprocess.Popen(['dot','-Tpdf'], stdin=p1.stdout, stdout=outfile)
        p2.communicate()
else:
    prepare_links()
    display_logo()
    if args.force:
        command.append("--forceall")
    if args.dry_run:
        command.append("--dryrun")
    if args.reason:
        command.append("--reason")
    if args.unlock:
        command.append("--unlock")
    if args.target and 'help' in args.target:
        command.append("help")
    subprocess.run(command)
