# Reference at http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html

import getopt
import logging
import os
import shutil
import subprocess
import sys
import time
from multiprocessing import Process, active_children

restart = False
try:
    import win32con
    import win32file
except:
    print('Import of pywin32 failed. Attempting install ...')
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--extra-index-url', 'http://172.16.15.21:8080/simple/', '--trusted-host', '172.16.15.21', '--timeout=30', 'pywin32'])
        print('Installed successfully.')
    except Exception as e:
        print(e)
    restart = True
try:
    from file_read_backwards import FileReadBackwards
except:
    print('Import of file_read_backwards failed. Attempting install ...')
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--extra-index-url', 'http://172.16.15.21:8080/simple/', '--trusted-host', '172.16.15.21', '--timeout=30', 'file-read-backwards'])
        print('Installed successfully.')
    except Exception as e:
        print(e)
    restart = True

if restart:  # Restart the application with previous parameters if we've downloaded any missing libraries.
    os.execv(sys.executable, [sys.executable] + sys.argv)


def copy(src, tar):
    try:
        shutil.copy2(src, tar)
    except IOError:
        logging.info('Copy of {} failed due to IO error.'.format(src))


def copy_big(files, site_folder, backup_folder):
    APPEND_SAVE = ('list',)
    # Set up a different logging to work with the process saving larger files
    logging.debug('Started backing up {} files.'.format(len(files)))
    logging.debug('Files:\n{}'.format('\n'.join(files)))
    total_time = time.time()
    for f in files:
        try:
            logging.debug('Processing file {} ...'.format(f))
            # Show how long copying/appending each file took.
            start_time = time.time()
            src = os.path.join(site_folder, f)
            tar = os.path.join(backup_folder, f)
            try:  # If for whatever reason the path to file doesn't exist, create it.
                os.makedirs(os.path.dirname(tar))
            except OSError as e:
                if e.errno != 17:  # We don't care about file exists error.
                    logging.warning(e.errno, str(e))
            if os.path.exists(tar) and tar.rsplit('.', 1)[-1] in APPEND_SAVE:  # If the file already exists ... #
                logging.debug('File {} exists. Appending.'.format(tar))
                ref_size = os.path.getsize(src)  # Once we're past this size, we can assume the files are different for some reason and we won't find a match.
                tar_size = os.path.getsize(tar)  # The size of the file on backup.
                if ref_size < tar_size:
                    logging.warning('Backed up file is larger than origin for {}. Copying ...'.format(src))

                    logging.debug('{} seconds to handle file {}'.format(round(time.time() - start_time, 2), src))
                    continue  # Move to next file
                try:
                    with FileReadBackwards(tar, encoding='utf-8') as frb:  # Open file handle on target file.
                        last_backed_line = frb.readline()  # Read the last line
                except:  # If we can't open the file, it's probably binary (dists), so just copy it whole for now.
                    logging.warning('Failed to back-read {}. Copying ...'.format(tar))
                    copy(src, tar)
                    logging.debug('{} seconds to handle file {}'.format(round(time.time() - start_time, 2), src))
                    continue  # Move to next file
                lines_to_write = list()
                match = False
                with FileReadBackwards(src, encoding='utf-8') as frb:  # Open file handle on source file which we're iterating through.
                    for line in frb:
                        if line.strip() == last_backed_line.strip():  # Lines have to be stripped, so we don't compare irrelevant data
                            match = True
                            break
                        lines_to_write.append('\n' + line)
                        if ref_size < tar_size:
                            logging.warning('Could not find match for {} within reasonable range. Skipping.'.format(src))
                            logging.debug('Line:\n{}Matches in range:{}'.format(last_backed_line, ''.join(lines_to_write[-10:])))
                            break
                        ref_size -= len(('\n' + line).encode('utf-8'))
                if match:  # Only write if match is there
                    with open(tar, 'a') as fh:  # Open the target file and append missing lines.
                        for line in reversed(lines_to_write):  # As we've been appending to the list, we have to iterate it reversed now.
                            fh.write(line)
                    shutil.copystat(src, tar)  # Copy file metadata so that update time matches.
                else:
                    logging.info('Could not append to {}; unable to match last line! Copying whole file.'.format(f))
                    copy(src, tar)
            else:  # If the file doesn't exist, copy it.
                logging.debug('File {} does not exist. Copying.'.format(tar))
                copy(src, tar)
            logging.debug('{} seconds to handle file {}'.format(round(time.time() - start_time, 1), src))
        except Exception as e:
            logging.exception('A fatal error has occurred copying file {}. File skipped: {}'.format(src, e))
    logging.debug('Copying of {} files completed successfully in {} seconds.'.format(len(files), round(time.time() - total_time, 2)))


if __name__ == '__main__':
    VERSION = '1.7.2'
    OUTPUT_PATH = 'D:\\siwim_mkiii\\backup_script'
    BACKUP_PATH = 'E:\\backup'
    PATH_TO_WATCH = 'D:\\siwim_mkiii\\sites'
    started = True
    INTERVAL = 600  # Saving interval in seconds
    REALTIME_SAVE = ('swu', 'event', 'jpg', 'vehiclephotos')
    LOG_LEVEL = logging.INFO

    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:i:o:t:', ['help', 'debug', 'drive=', 'interval=', 'output=', 'target='])
    except getopt.GetoptError:
        logging.warning('Parsing of parameters failed on obtaining stage. Using defaults')
        opts = list()

    try:
        for opt, arg in opts:
            if opt in ('-h', '--help'):
                print('\nBackup version {} options:\n'
                      '--debug: Enable debug output/logging. Should only be used when debugging as this mode logs/displays every file copy as well as some other information.\n'
                      '-d --drive: Define backup drive letter/path (default {}).\n'
                      '-h --help: Write help and exit.\n'
                      '-i --interval: Saving interval in seconds (default {})\n'
                      '-o --output: Absolute path to folder where output logs are to be stored.\n'
                      '-t --target: Absolute path to folder that we\'re backing up (default {})'.format(VERSION, INTERVAL, BACKUP_PATH, PATH_TO_WATCH))
                sys.exit()
            elif opt == '--debug':
                LOG_LEVEL = logging.DEBUG
            elif opt in ('-d', '--drive'):
                BACKUP_PATH = arg
            elif opt in ('-i', '--interval'):
                INTERVAL = int(arg)
            elif opt in ('-o', '--output'):
                OUTPUT_PATH = arg
            elif opt in ('-t', '--target'):
                PATH_TO_WATCH = arg
    except SystemExit:  # Since sys.exit just raises an exception, we catch it an reraise it.
        sys.exit()
    except:
        logging.warning('Parsing of parameters failed on parsing stage. Using defaults.')

    logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=LOG_LEVEL)
    logging.info('Arguments: {}.'.format(sys.argv))

    if not os.path.exists(OUTPUT_PATH):
        os.mkdir(OUTPUT_PATH)

    logging.info('Backup script v{} started. Backing up from {} to {}. Periodic backup called every {} seconds. Logs written to {}'.format(VERSION, PATH_TO_WATCH, BACKUP_PATH, INTERVAL, OUTPUT_PATH))

    FILE_LIST_DIRECTORY = 0x0001

    try:
        hDir = win32file.CreateFile(
            PATH_TO_WATCH,
            FILE_LIST_DIRECTORY,
            win32con.FILE_SHARE_READ,
            None,
            win32con.OPEN_EXISTING,
            win32con.FILE_FLAG_BACKUP_SEMANTICS,
            None
        )
    except:
        logging.exception('Path to watch probably does not exist:')
        sys.exit()

    start = time.time()
    files = set()
    big_files = set()  # Files that we update every X minutes
    results = list()
    backup_results = list()
    processes = list()
    failed = False  # Indicates the previous saving has failed

    while True:
        try:
            logging.debug('Running files changed query ...')
            backup_results = set(file[1] for file in results)  # Store the file names so far so we have them to report something if buffer overflows. Probably overkill.
            results = set(win32file.ReadDirectoryChangesW(  # Cast to set, so that duplicates of the same event for the same file are skipped
                hDir,
                32768,  # This buffer should be able to hold all changed files.
                True,
                win32con.FILE_NOTIFY_CHANGE_SIZE,
            ))

            if failed:
                logging.info('Previous buffer lost. Now handling {} files.'.format(len(backup_results)))
                failed = False

            if len(results) == 0:
                logging.error('Buffer overflow; some of the lost file names logged to backup_overflow.log; some files (up to {} seconds) have been lost.'.format(INTERVAL))
                with open(os.path.join(OUTPUT_PATH, 'backup_overflow.log'), 'a') as file:
                    file.write('{}\n'.format('\n'.join(backup_results)))
                failed = True

            for action, file in results:
                if action == 3 and file.rsplit('.', 1)[-1] in REALTIME_SAVE:  # If the list isn't reliable, it's probably useless ...
                    src = os.path.join(PATH_TO_WATCH, file)
                    tar = os.path.join(BACKUP_PATH, file)
                    logging.debug('Copying {} ...'.format(src))
                    copy_time = time.time()
                    try:  # If for whatever reason the path to file doesn't exist, create it.
                        os.makedirs(os.path.dirname(tar))
                    except Exception as e:
                        pass
                    copy(src, tar)
                    end_time = time.time() - copy_time
                    logging.debug('{}s ({} kB) to copy {}'.format(round(end_time, 2), os.stat(src).st_size / 1000.0, src.split('\\', 4)[-1]))
                else:
                    big_files.add(file)

            if time.time() - start > INTERVAL:
                if len(active_children()) != 0:
                    logging.warning('Previous backup hasn\'t finished! Active backups: {}'.format(active_children()))
                start = time.time()
                p = Process(target=copy_big, args=(big_files, PATH_TO_WATCH, BACKUP_PATH))
                logging.debug('Running periodic backup for {} files.'.format(len(big_files)))
                p.start()
                big_files.clear()

        except:
            logging.warning('An error has occurred while backing up {}. File not backed up:'.format(files), exc_info=True)
