Source code for ferenda.sources.general.static

# -*- coding: utf-8 -*-
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
from builtins import *

import os

from bs4 import BeautifulSoup
from docutils.core import publish_string
from rdflib import URIRef, Graph, Literal, Namespace
from rdflib.namespace import DCTERMS, RDF
OLO = Namespace("http://purl.org/ontology/olo/core#")
PROV = Namespace("http://www.w3.org/ns/prov#")

from ferenda import DocumentRepository
from ferenda import DocumentStore
from ferenda import util
from ferenda.decorators import managedparsing
from ferenda import elements
from ferenda.elements.html import elements_from_soup

class StaticStore(DocumentStore):

    """Customized DocumentStore that looks for all "downloaded" resources
    from the specified ``staticdir``. If ``staticdir`` isn't provided
    or doesn't exist, falls back to a collection of package resources
    (under ferenda/res/static). Parsed, generated etc files
    are handled like normal, ie stored under
    ``[datadir]/static/{parsed,distilled,generated,...}/``
    """

    def downloaded_path(self, basefile, version=None, attachment=None):
        segments = [self.staticdir,
                    self.basefile_to_pathfrag(basefile) + self.downloaded_suffixes[0]]
        return "/".join(segments).replace("/", os.sep)

    def list_basefiles_for(self, action, basedir=None, force=True):
        if action == "parse":
            for x in util.list_dirs(self.staticdir, self.downloaded_suffixes[0]):
                pathfrag = x[len(self.staticdir) + 1:-len(self.downloaded_suffixes[0])]
                yield self.pathfrag_to_basefile(pathfrag)
        else:
            for x in super(StaticStore, self).list_basefiles_for(action, basedir, force):
                yield x


[docs]class Static(DocumentRepository):

    """Generates documents from your own ``.rst`` files

    The primary purpose of this docrepo is to provide a small set of
    static pages for a complete ferenda-based web site, like "About
    us", "Contact information", "Terms of service" or whatever else
    you need. The ``download`` step of this docrepo does not do
    anything, and it's ``parse`` step reads ReStructuredText
    (``.rst``) files from a local directory and converts them into
    XHTML+RDFa. From that point on, it works just like any other
    docrepo.

    After enabling this, you should set the configuration parameter
    ``staticdir`` to the path of a directory where you keep your
    ``.rst`` files::

        [static]
        class = ferenda.sources.general.Static
        staticdir = /var/www/mysite/static/rst

    .. note::

       If this configuration parameter is not set, this docrepo will
       use a small set of generic static pages, stored under
       ``ferenda/res/static-pages`` in the distribution. To get
       started, you can just copy this directory and set ``staticdir``
       to point at your copy.

    If a rst file has a special `:footer-order:` directive directly
    underneath the main title, it will result in a link in the site
    footer. The link text will be the title of the document, i.e. the
    first header in the ``.rst`` file. The order of those links is
    controlled by the value of `:footer-order:`, which should be an
    integer.

    """
    alias = "static"
    downloaded_suffix = ".rst"
    documentstore_class = StaticStore
    sparql_annotations = None
    # urls become on the form "http://localhost:8000/static/about"

    def __init__(self, config=None, **kwargs):
        super(Static, self).__init__(config, **kwargs)
        if 'staticdir' in self.config:
            staticdir = self.config.staticdir
            assert os.path.exists(staticdir), "%s does not exist" % staticdir
        else:
            p = self.resourceloader.filename('static/README')
            staticdir = os.path.dirname(p)
        self.store.staticdir = staticdir
        
    @property
    def config(self):
        return self._config
    
    @config.setter
    def config(self, config):
        staticdir = self.store.staticdir
        # FIXME: we reimplement this method instead of calling the
        # super()class implementation because I have no idea on how to
        # do that with a @property decorator. We need to re-set the
        # store.staticdir property, hence the need for this subclass
        # implementation.
        self._config = config
        self.store = self.documentstore_class(
            config.datadir + os.sep + self.alias,
            storage_policy=self.storage_policy,
            compression=self.config.compress)
        self.store.staticdir = staticdir
        
    def download(self):
        pass

    @managedparsing
    def parse(self, doc):
        source = util.readfile(self.store.downloaded_path(doc.basefile))
        html = publish_string(source, writer_name="html")
        soup = BeautifulSoup(html, "lxml")
        docinfo = soup.find("table", "docinfo")
        docuri = URIRef(doc.uri)
        if docinfo:
            # this is where our custom metadata goes
            for row in docinfo.find_all("tr", "field"):
                key, val = row.th.text.strip(), row.td.text.strip()
            if key == 'footer-order:':
                doc.meta.add((docuri, OLO['index'], Literal(int(val))))
            else:
                self.log.warning("%s: Unknown metadata directive %s (%s)" %
                                 (doc.basefile, key, val))

            # we don't need these in the final result
            docinfo.decompose()
        soup.find("h1", "title").decompose()

        doc.body = elements_from_soup(soup.body)
        doc.meta.add((docuri, DCTERMS.title,
                      Literal(soup.title.text, doc.lang)))
        doc.meta.add((docuri, PROV.wasGeneratedBy, Literal(self.qualified_class_name())))
        doc.meta.add((docuri, RDF.type, self.rdf_type))
        self.parse_entry_update(doc)
        return True


    def toc(self, otherrepos=[]):
        pass

    def news(self, otherrepos=[]):
        pass

    def frontpage_content(self, primary=False):
        pass
    
    def tabs(self):
        if os.path.exists(self.store.parsed_path("about")):
            return [("About", self.canonical_uri("about"))]
        else:
            return[]

    def footer(self):
        res = {}
        for basefile in self.store.list_basefiles_for("generate"):
            uri = self.canonical_uri(basefile)
            g = Graph()
            g.parse(self.store.distilled_path(basefile))
            # only return those files that have olo:index metadata, in
            # that order
            if g.value(URIRef(uri), OLO['index']):
                title = g.value(URIRef(uri), self.ns['dcterms'].title).toPython()
                if not title:
                    title = basefile
                res[int(g.value(URIRef(uri), OLO['index']))] = (title, uri)
        return [res[x] for x in sorted(res)]