#! /usr/bin/env python3
'''Blog related pages'''

import re
import datetime
import json
import urllib.parse
import xml.sax.saxutils as saxutils
import uuid

from flask import request, abort, make_response, redirect

from .common import Jinja, Shortener, Expander
from .logging import debug
from .db import (
    get_entries, get_entry, increment_video_views,
    get_source, get_all_sources, get_source_videos,
    cache_tags, get_videos_per_tag,
    validate_params, get_clean_params,
    db_hosts
)


class Redirection:
    '''Class for triggerring redirects'''
    def __init__(self, url):
        self.url = url


class Page:
    '''Blog related pages'''
    page_size = {
        'gallery': 100,
    }

    _accepted_vars = ['order', 'sort_by', 'since', 'until', 'names', 'tags',
                      'source', 'hosts', 'limit', 'tags_op']
    # vars that go into links to other blog pages
    _clean_vars = ['order', 'sort_by', 'since', 'until', 'names', 'tags',
                   'source', 'hosts', 'limit', 'tags_op']

    _extra_hosts_regex = re.compile('&.*$')

    @staticmethod
    def get(page_number=None, tags=None, dates=None, source=None,
            page_type=None, params=None, _autoplay_video=None,
            _update_url=None, _is_random=None):
        '''Hangle page request'''

        if params is None:
            params = {}

        # default to the gallery
        if not page_type:
            page_type = 'gallery'

        if not page_number:
            page_number = 0
        else:
            page_number = int(page_number)

        # add no_video to watch urls if the user asked for it
        # (only affects the gallery)
        add_no_video = False
        for var in ['links', 'no_video']:
            if var in params:
                add_no_video = True
                del params[var]

        # autoplay videos if user requested it
        autoplay = False
        for var in ['autoplay', 'videos']:
            if var in params:
                autoplay = True
                del params[var]

        if _is_random:
            params['sort_by'] = 'random'
            params['limit'] = '20'
            autoplay = True

        # set which video to autoplay, if available
        autoplay_video = None
        if _autoplay_video:
            autoplay = True
            autoplay_video = _autoplay_video

        new_location = None
        if _update_url:
            new_location = _update_url

        # prepare query parameters
        path_args = {'tags': tags, 'dates': dates, 'source': source}
        params = Page.merge_arguments(path_args, params)
        params = validate_params(params, strict=False,
                                 accepted_vars=Page._accepted_vars)

        # select only one page
        page_size = Page.page_size[page_type]
        # if user set a limit, try to use that as page size
        if 'limit' in params and params['limit']:
            try:
                page_size = int(params['limit'])
            except ValueError:
                pass
        params['offset'] = page_size*page_number
        # NOTE that we request one more entry, so we can determine if there are
        # more entries after the current page
        params['limit'] = page_size + 1

        entries = list(get_entries(**params))

        params['limit'] -= 1

        # NOTE that tags should be cached before the entries are expanded
        cache_tags(entries)
        Expander.expand_entries(entries)

        # determine if there are more entries after this page
        has_more = False
        if len(entries) > page_size:
            entries = entries[:page_size]
            has_more = True

        # build urls for links in the page
        page_links = Page.get_page_links(page_number, page_type, params, has_more)

        # set page title, when applicable
        page_title = None
        if params['tags']:
            page_title = ', '.join(params['tags']).replace('_', ' ')
        elif params['source'] and entries:
            page_title = entries[0]['source_text']
        elif params['hosts']:
            hosts_with_slashes = [u'{}/'.format(host) for host in params['hosts']]
            page_title = ', '.join(hosts_with_slashes)
        elif params['since'] or params['until']:
            dates = []
            for key in ['since', 'until']:
                if params[key]:
                    dates.append(u'{} {}'.format(key, params[key]))
            page_title = ', '.join(dates)

        template_values = {
            'entries': entries,
            'tags': tags,
            'page_title': page_title,
            'page_links': page_links,
            'add_no_video': add_no_video,
            'autoplay': autoplay,
            'autoplay_video': autoplay_video,
            'new_location': new_location,
        }

        template = 'blog/' + page_type

        return (template, template_values)

    @classmethod
    def check_source(cls, **kwargs):
        '''Check if this is an old shortlink that needs to be redirected.'''
        shortlink = kwargs['source']

        source = get_source(shortlink)

        if not source:
            abort(404)

        # if this is an old shortlink, redirect to the new one
        if source['shortlink'] != shortlink:
            new_url = request.url.replace(
                f'/blog/{shortlink}', f'/blog/{source["shortlink"]}')

            # make sure we don't loop infinitely
            if new_url != request.url:
                return Redirection(new_url)

        return cls.get(**kwargs)

    @staticmethod
    def merge_arguments(path, params):
        '''Marge tags and dates specified via the path with those specified via
        parameters.

        In the case of dates, only the first date will be taken into
        consideration. In other words, we keep backward compatibility for
        /YYMMDD/ paths, but not for /YYMMDD,YYMMDD,...
        '''

        if 'tags' in path and path['tags']:
            if not ('tags' in params and params['tags']):
                params['tags'] = path['tags']
            else:
                # split both lists, add non duplicate value and rejoin
                tags = params['tags'].split(',')
                for tag in path['tags'].split(','):
                    if not tag in tags:
                        tags.append(tag)
                params['tags'] = ','.join(tags)

        if 'dates' in path and path['dates']:
            dates = path['dates'].split(',')
            date = dates[0]

            # generate since and to
            since = datetime.datetime.strptime(date, '%y%m%d')
            until = since + datetime.timedelta(days=1)

            # a date in path overrides any parameters which may have been passed
            params['since'] = str(since)
            params['until'] = str(until)

        if 'source' in path and path['source']:
            params['source'] = path['source']

        return params

    @staticmethod
    def _get_page_base_path(page_type):
        # since gallery is the default, there's no need to include it in
        # the link
        if page_type == 'gallery':
            return '/blog/'
        return '/blog/{}/'.format(page_type)

    @staticmethod
    def get_page_links(page_number, page_type, params, has_more):
        '''Builds urls for each page type, and also to the previous and the
        current page numbers.

        Notice that in the links to other types we porpusefully skip the page
        number. The reason for this is that pages have different sizes for
        different page types, so in order to include it we'd have to calculate
        the appropriate page number for each, and they still wouldn't match each
        other, so it's not worth the trouble.
        '''

        page_links = {}

        # default values
        default_values = {
            'order': 'descending',
            'sort_by': 'posted_at',
            'limit': Page.page_size[page_type],
            'tags_op': 'or',
        }

        # select only params that contain a useful value
        link_params = get_clean_params(params, Page._clean_vars,
                                       default=default_values)
        path_params = ''
        if link_params:
            path_params = '?{}'.format(Page.urlencode(link_params))

        # create link for each page type
        for _type in ['gallery', 'rss']:
            # don't include a link to the current page
            if _type == page_type:
                page_links[_type] = None
            else:
                base_path = Page._get_page_base_path(_type)
                page_links[_type] = base_path + path_params

        # if we're in the gallery without any params, use /blog/page/ to help
        # robots efficiently index all videos
        if page_type == 'gallery' and not path_params:
            page_path = '/blog/page/{page}'
        else:
            base_path = Page._get_page_base_path(page_type)
            page_path = base_path + '{page}'

        # only show page links if there is more than one page total
        if page_number > 0 or has_more:
            page_links['prev_page'] = page_links['next_page'] = None
            page_links['top_page'] = page_path.format(page=0) + path_params
            if page_number > 0:
                page_links['prev_page'] = (page_path.format(page=page_number-1) +
                                           path_params)
            if has_more:
                page_links['next_page'] = (page_path.format(page=page_number+1) +
                                           path_params)

        return page_links

    @staticmethod
    def urlencode(params):
        '''Encode url'''
        return urllib.parse.urlencode(params)

    @staticmethod
    def _redirect_to_page_containing(entry):
        '''Redirects to blog page cointaining the video and autoplays the video.

        If video source contains more than one video, then the source page will
        be used, otherwise we will try to pick the most specific tag that
        contains more than one video (and if there is none, fallback to the
        source)
        '''

        shortlink = entry['source_shortlink']
        url_by_source = '/blog/{}'.format(shortlink)
        if len(get_source_videos(entry['source_id'])) > 1:
            return Page.get(
                source=shortlink, _autoplay_video=entry['name'],
                _update_url=url_by_source
            )

        # try to find the most specific tag (the one with the fewest videos, but
        # still more than one)
        #
        # note that tags that cover more than one page are unacceptable because
        # the video might not be on the first page
        cache_tags([entry])
        tags = (get_videos_per_tag(entry['tags'].split(', '))
                if entry['tags'] else {})

        best_tag, best_tag_len = None, None
        for tag, tag_len in tags.items():
            if (1 < tag_len < Page.page_size['gallery'] and
                    (best_tag_len is None or tag_len < best_tag_len)):
                best_tag = tag
                best_tag_len = tag_len

        if best_tag:
            debug('source only has one video, using tag ‘%s’ instead',
                  best_tag)
            url_by_tag = '/blog/tag/{}'.format(best_tag)
            return Page.get(
                tags=best_tag, _autoplay_video=entry['name'],
                _update_url=url_by_tag
            )

        # no appropriate tag found, fallback to source
        return Page.get(
            source=shortlink, _autoplay_video=entry['name'],
            _update_url=url_by_source
        )

    @staticmethod
    def redirect(video=None, params=None):
        '''Check if video is in the blog and either redirect to new url scheme
        (/#ab/cde) or to an appropriate blog page containing the video.'''
        # pylint: disable = unused-argument

        if not video:
            return Redirection('/')

        # check that video is a valid shortenable or shortened url
        shortened_url = None
        expanded_urls = Shortener.expand_urls(video)
        if expanded_urls:
            shortened_url = Shortener.shorten_urls(expanded_urls)

        if shortened_url:
            # drop extra hosts if they exist (i.e. ab/cde,fgh&ij/klm -> ab/cde,fgh)
            # we have to do this because the front page doesn't understand lists
            # with multiple hosts
            shortened_url = Page._extra_hosts_regex.sub('', shortened_url)

            entry = get_entry(shortened_url)
            if entry:
                return Page._redirect_to_page_containing(entry)

            # if video is not in the blog, redirect to the reshortened url, so
            # we don't forward urls like /ab/cde.mp4&ab/cde.webm and so on
            return Redirection('/#{}'.format(shortened_url))

        # video not shortenable, just redirect as is
        return Redirection('/#{}'.format(video))


class Json:
    '''API for retrieving video information from the DB'''

    @staticmethod
    def get(params=None):
        '''Handle API request'''
        # set filter parameters based on variables in the request
        # do this now so we can fail quickly before retrieving any entries
        try:
            params = validate_params(params)
        except ValueError as e:
            return_dict = {'success': False, 'error': str(e), 'entries': None}
            return json.dumps(return_dict, indent=2)

        entries = list(get_entries(**params))

        # NOTE that tags should be cached before the entries are expanded
        cache_tags(entries)
        Expander.expand_entries(entries)

        return_dict = {'success': True, 'error': None, 'entries': entries}
        return json.dumps(return_dict, indent=2)

    @staticmethod
    def post(db_action=None, params=None):
        '''Handle API POST request'''
        view_count = 0
        if db_action == 'count_view':
            if not ('name' in params and params['name']):
                abort(404)

            view_count = increment_video_views(params['name'])

            if not view_count:
                return_dict = {
                    'success': False,
                    'error': 'video not found: {}'.format(params['name']),
                }
                return json.dumps(return_dict, indent=2)
        else:
            abort(404)

        return_dict = {'success': True, 'error': None, 'views': view_count}
        return json.dumps(return_dict, indent=2)

    @staticmethod
    def video(params=None):
        '''Return a video's expanded urls.'''
        if not (params and params.get('name')):
            abort(400)

        expanded_urls = Shortener.expand_urls(params['name'])
        shortened_url = (Shortener.shorten_urls(expanded_urls)
                         if expanded_urls else None)

        if not (shortened_url or expanded_urls):
            # not a valid url
            abort(404)

        return json.dumps({
            'name': shortened_url,
            'urls': expanded_urls,
        })

class Logger:
    '''Logs requests. Meant for debugging UpdateThumbs.'''

    @staticmethod
    def get(params):
        '''Debug requests'''
        debug('logger: got arguments: %s', params)


class Tag:
    '''List of all tags in DB'''

    @staticmethod
    def get(params=None):
        '''List tags'''
        # pylint: disable = unused-argument

        tags = get_videos_per_tag()
        return ('blog/tags', {'tags': tags})


class SourcesList:
    '''List of all sources in DB'''
    @staticmethod
    def get(params=None):
        '''List sources'''
        # pylint: disable = unused-argument

        sources = get_all_sources()
        return ('blog/sources', {'sources': sources})


class RSS:
    '''RSS feeds'''

    _accepted_vars = ['order', 'since', 'until', 'offset', 'limit', 'names',
                      'tags', 'source', 'hosts', 'tags_op']

    _blog_url = 'https://loopvid.kastden.org/blog'

    default_size = 50

    @staticmethod
    def get(params=None):
        '''RSS feed'''

        default_params = {'limit': RSS.default_size}
        params = validate_params(
            params, defaults=default_params, strict=False,
            accepted_vars=RSS._accepted_vars
        )

        entries = list(get_entries(**params))

        # NOTE that tags should be cached before the entries are expanded
        cache_tags(entries)
        Expander.expand_entries(entries)

        # generate RSS fields for each entry
        for entry in entries:
            values = {
                'blog_url': RSS._blog_url,
                'entry': entry,
            }
            description = Jinja.render('blog/rss/description', values)
            entry['description'] = saxutils.escape(description)

            entry['guid'] = RSS.guid(entry['watch_url'])
            entry['rfc822_date'] = RSS.rfc822_date(entry['posted_at'])

        values = {
            'blog_url': RSS._blog_url,
            'entries': entries,
        }

        return ('blog/rss/feed', values)

    @staticmethod
    def rfc822_date(date_str):
        '''Convert post time to RFC822 format'''
        date = datetime.datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S.%f')
        return date.strftime('%a, %d %b %Y %H:%M:%S GMT')

    @staticmethod
    def guid(url):
        '''Generate GUID based on an entry's url'''
        guid = uuid.uuid5(uuid.NAMESPACE_URL, url)
        return str(guid)


class Hosts:
    '''List of videos from one host'''

    @staticmethod
    def get(host=None, params=None):
        '''List videos from one host'''
        # most hosts aren't posted to the blog, so we shouldn't confuse the user
        # redirecting him to a blank page
        if not host in db_hosts:
            abort(404)

        params['hosts'] = host
        return Page.get(params=params)


class Router:
    '''Routes requests to the appropriate handler classes.

    This is a proper-regex-capable replacement for webapp2's terrible routing.
    '''

    _double_slash_regex = re.compile('/{2,}')

    _handlers = [
        # blog page

        # notice /blog/list will redirect to /blog (i.e. the gallery) to
        # maintain compatibility with old links

        # source
        # note that source must come first because sources with only numbers
        # look like page numbers
        (re.compile(
            r'^/blog(?:/(?:gallery|videos|list))?/(?P<source>[0-9A-Fa-f]{8})/?$'),
         Page.check_source),

        # tags before dates
        (re.compile(
            r'^/blog(?:/(?:gallery|videos|list))?(?:/tag/(?P<tags>[^/]+))?'
            r'(?:/(?P<dates>\d{6}(?:,\d{6})*))?(?:/(?P<page_number>\d+))?/?$'),
         Page.get),
        # dates before tags
        (re.compile(
            r'^/blog(?:/(?:gallery|videos|list))?'
            r'(?:/(?P<dates>\d{6}(?:,\d{6})*))?(?:/tag/(?P<tags>[^/]+))?'
            r'(?:/(?P<page_number>\d+))?/?$'),
         Page.get),

        # dedicated page number url
        (re.compile(r'^/blog/page/(?P<page_number>\d+)/?$'), Page.get),
        # dedicated random page
        (re.compile(r'^/blog/(?P<_is_random>random)/$'), Page.get),

        # tags
        (re.compile(r'^/blog/tags?/?$'), Tag.get),
        # sources
        (re.compile(r'^/blog/sources/?$'), SourcesList.get),
        # json
        (re.compile(r'^/blog/json/?$'), Json.get),
        (re.compile(r'^/blog/json/(?P<db_action>count_view)/?$'), Json.post),
        (re.compile(r'^/blog/json/video/?$'), Json.video),
        # rss
        (re.compile(r'^/blog/rss/?$'), RSS.get),
        # logger
        (re.compile(r'^/blog/log/?$'), Logger.get),
        # redirect
        (re.compile(r'^/(?:blog/)?(?P<host>[a-z][a-z0-9])/?$'), Hosts.get),
        (re.compile(r'^/(?P<video>[a-z][a-z0-9]/.+)$'), Page.redirect),
    ]

    # handlers that return something other than html (for setting Content-Type
    # header)
    _handler_output = {
        Json.get: 'application/json',
        Json.post: 'application/json',
        Json.video: 'application/json',
        RSS.get: 'text/xml',
    }

    _aliases = {
        'from': 'since',
        'to': 'until',
        'sources': 'source',
    }

    def get(self, path=''):
        '''Route request'''
        return self.route(path=path)

    def route(self, path='/'):
        '''Route request to appropriate handler class.'''
        # pylint: disable = comparison-with-callable
        debug('routing path %s', path)

        # get rid of double slashes
        path = Router._double_slash_regex.sub('/', path)

        # try all regexes
        handler = None
        for regex, regex_handler in Router._handlers:
            match = regex.match(path)
            if match:
                handler = regex_handler
                break

        if not (match and handler):
            debug('no match')
            abort(404)
        else:
            debug('match=%s', match)

        args = match.groupdict()
        args['params'] = self.get_params()

        pargs = []

        # call handler, write output to response
        output = handler(*pargs, **args)

        if isinstance(output, Redirection):
            return redirect(output.url, 301)

        content_type = (Router._handler_output[handler]
                        if handler in Router._handler_output else None)

        # if response is a pair (template, values), render it before writing,
        # otherwise write as is
        if isinstance(output, tuple):
            output = Jinja.render(*output)
        elif output is None:
            output = ''

        response = make_response(output)

        if content_type:
            response.headers['Content-Type'] = content_type

        return response

    @staticmethod
    def get_params():
        '''Returns dict with all parameters set by the user.'''
        params = dict(request.values)

        for key, value in params.items():
            # replace aliases with the original key
            if key in Router._aliases:
                original_key = Router._aliases[key]
                params[original_key] = value
                del params[key]

        return params
