# -*- coding: utf-8 -*-
from __future__ import (absolute_import, division,
print_function, unicode_literals)
from builtins import *
from io import BytesIO
import inspect
import json
import logging
import os
from lxml import etree
from lxml.builder import ElementMaker
from rdflib import URIRef, Literal, BNode, Graph, Namespace, RDF, RDFS
from rdflib.namespace import FOAF, SKOS
BIBO = Namespace("http://purl.org/ontology/bibo/")
from layeredconfig import LayeredConfig, Defaults
from ferenda import DocumentRepository, ResourceLoader
from ferenda import util, errors
[docs]class Resources(object):
"""Creates and manages various assets/resources needed for web serving.
"""
def __init__(self, repos, resourcedir, **kwargs):
# FIXME: document what kwargs could be (particularly 'combineresources')
self.repos = repos
self.resourcedir = resourcedir
defaults = DocumentRepository.get_default_options()
defaults.update(kwargs)
self.config = LayeredConfig(Defaults(defaults))
# the below call to setup_logger alters the logging level of
# the root logger, which can't be good practice. Also, we
# should probably not log to the root logger, but rather to
# ferenda.resources.
#
# from ferenda.manager import setup_logger
# self.log = setup_logger()
self.log = logging.getLogger("ferenda.resources")
# FIXME: How should we set up a global loadpath from the
# individual repos?
loadpaths = [ResourceLoader.make_loadpath(repo) for repo in repos]
loadpath = ["."] # cwd always has priority -- makes sense?
for subpath in loadpaths:
for p in subpath:
if p not in loadpath:
loadpath.append(p)
self.resourceloader = ResourceLoader(*loadpath)
[docs] def make(self,
css=True,
js=True,
img=True,
xml=True,
api=None):
res = {}
if api is None:
api = not self.config.staticsite
if css:
res['css'] = self.make_css()
if js:
res['js'] = self.make_js()
if img:
res['img'] = self.make_img()
if xml:
res['xml'] = self.make_resources_xml(res.get('css', []), res.get('js', []))
if api:
res['json'] = self.make_api_files()
# finally, normalize paths according to os.path.sep
# conventions
if os.sep == "\\":
for part in res:
result = []
for x in res[part]:
if x.startswith("http://") or x.startswith("https://"):
result.append(x)
else:
result.append(x.replace('/', os.sep))
res[part] = result
return res
[docs] def make_css(self):
import cssmin
combinefile = None
if self.config.combineresources:
combinefile = os.sep.join([self.resourcedir, 'css', 'combined.css'])
return self._make_files(
'cssfiles', self.resourcedir + os.sep + 'css', combinefile, cssmin.cssmin)
[docs] def make_js(self):
# slimit provides better perf, but isn't py3 compatible
# import slimit
# js = slimit.minify(
# jsbuffer.getvalue(), mangle=True, mangle_toplevel=True)
import jsmin
combinefile = None
if self.config.combineresources:
combinefile = os.sep.join([self.resourcedir, 'js', 'combined.js'])
return self._make_files(
'jsfiles', self.resourcedir + os.sep + 'js', combinefile, jsmin.jsmin)
[docs] def make_img(self):
return self._make_files('imgfiles', self.resourcedir + os.sep + 'img')
[docs] def make_resources_xml(self, cssfiles, jsfiles):
E = ElementMaker() # namespace = None, nsmap={None: ...}
root = E.configuration(
E.sitename(self.config.sitename),
E.sitedescription(self.config.sitedescription),
E.url(self.config.url),
E.tabs(*self._links('tabs')),
E.footerlinks(*self._links('footer')),
E.stylesheets(*self._li_wrap(cssfiles, 'link', 'href', rel="stylesheet")),
E.javascripts(*self._li_wrap(jsfiles, 'script', 'src', text=" "))
)
if not self.config.staticsite:
root.append(
E.search(
E.endpoint(self.config.searchendpoint)
)
)
outfile = self.resourcedir + os.sep + "resources.xml"
util.writefile(
outfile,
etree.tostring(
root,
encoding="utf-8",
pretty_print=True).decode("utf-8"))
self.log.info("Wrote %s" % outfile)
return [self._filepath_to_urlpath(outfile, 1)]
# FIXME: When creating <script> elements, must take care not to
# create self-closing tags (like by creating a single space text
# node)
def _li_wrap(self, items, container, attribute, text=None, **kwargs):
elements = []
for item in items:
kwargs[attribute] = item
e = etree.Element(container, **kwargs)
e.text = text
elements.append(e)
return elements
def _links(self, methodname):
E = ElementMaker()
elements = []
for repo in self.repos:
alias = repo.alias
items = getattr(repo, methodname)()
self.log.debug("Adding %(methodname)s from docrepo %(alias)s" %
locals())
elements.extend(self._links_listitems(items))
return elements
def _links_listitems(self, listitems):
E = ElementMaker()
elements = []
for item in listitems:
if len(item) == 2:
(label, url) = item
sublists = None
else:
(label, url, sublists) = item
self.log.debug(
" - %(label)s (%(url)s)" % locals())
if url:
li = E.li(E.a({'href': url}, label))
else:
li = E.li(label)
if sublists:
subelements = []
for sublist in sublists:
subelements.extend(self._links_listitems(sublist))
li.append(E.ul(*subelements))
elements.append(li)
return elements
def _make_files(self, option, filedir, combinefile=None, combinefunc=None):
urls = []
buf = BytesIO()
processed = set()
# eg. self.config.cssfiles
if getattr(self.config, option): # it's possible to set eg
# cssfiles=None when
# creating the Resources
# object
for f in getattr(self.config, option):
urls.append(self._process_file(f, buf, filedir, "ferenda.ini"))
processed.add(f)
for repo in self.repos:
# FIXME: create a more generic way of optionally
# signalling to a repo that "Hey, now it's time to create
# your resources if you can"
if repo.__class__.__name__ == "SFS" and option == "imgfiles":
self.log.info("calling into SFS._makeimages()")
LayeredConfig.set(repo.config, 'imgfiles', repo._makeimages())
for f in getattr(repo.config, option):
if f in processed:
continue
urls.append(self._process_file(f, buf, filedir, repo.alias))
processed.add(f)
urls = list(filter(None, urls))
if combinefile:
txt = buf.getvalue().decode('utf-8')
util.writefile(combinefile, combinefunc(txt))
return [self._filepath_to_urlpath(combinefile, 2)]
else:
return urls
def _process_file(self, filename, buf, destdir, origin=""):
"""
Helper function to concatenate or copy CSS/JS (optionally
processing them with e.g. Scss) or other files to correct place
under the web root directory.
:param filename: The name (relative to the ferenda package) of the file
:param buf: A buffer into which the contents of the file is written
(if combineresources == True)
:param destdir: The directory into which the file will be copied
(unless combineresources == True)
:param origin: The source of the configuration that specifies this file
:returns: The URL path of the resulting file, relative to the web root
(or None if combineresources == True)
:rtype: str
"""
if filename.startswith("http://") or filename.startswith("https://"):
if self.config.combineresources:
raise errors.ConfigurationError(
"makeresources: Can't use combineresources=True in combination with external js/css URLs (%s)" % filename)
self.log.debug("Using external url %s" % filename)
return filename
try:
fp = self.resourceloader.openfp(filename, binary=True)
except errors.ResourceNotFound:
self.log.warning("file %(filename)s (specified in %(origin)s)"
" doesn't exist" % locals())
return None
(base, ext) = os.path.splitext(filename)
if self.config.combineresources:
self.log.debug("combining %s into buffer" % filename)
d = fp.read()
buf.write(d)
fp.close()
return None
else:
# FIXME: don't copy (at least not log) if the outfile
# already exists.
# self.log.debug("writing %s out to %s" % (filename, destdir))
outfile = destdir + os.sep + os.path.basename(filename)
if (os.path.islink(outfile) and
os.path.relpath(os.path.join(os.path.dirname(outfile),
os.readlink(outfile))) == util.name_from_fp(fp)):
self.log.warning("%s is a symlink to source file %s, won't overwrite" % (outfile, util.name_from_fp(fp)))
else:
util.ensure_dir(outfile)
with open(outfile, "wb") as fp2:
fp2.write(fp.read())
fp.close()
return self._filepath_to_urlpath(outfile, 2)
[docs] def make_api_files(self):
# this should create the following files under resourcedir
# api/context.json (aliased to /json-ld/context.json if legacyapi)
# api/terms.json (aliased to /var/terms.json if legacyapi)
# api/common.json (aliased to /var/common.json if legacyapi)
# MAYBE api/ui/ - copied from ferenda/res/ui
files = []
context = os.sep.join([self.resourcedir, "api", "context.json"])
if self.config.legacyapi:
self.log.info("Creating API files for legacyapi")
contextpath = "/json-ld/context.json"
termspath = "/var/terms"
commonpath = "/var/common"
else:
# FIXME: create correct URL path
contextpath = "/rsrc/api/context.json"
termspath = "/rsrc/api/terms.json"
commonpath = "/rsrc/api/common.json"
util.ensure_dir(context)
with open(context, "w") as fp:
contextdict = self._get_json_context()
s = json.dumps({"@context": contextdict}, separators=(', ', ': '),
indent=4, sort_keys=True)
fp.write(s)
files.append(self._filepath_to_urlpath(context, 2))
common = os.sep.join([self.resourcedir, "api", "common.json"])
terms = os.sep.join([self.resourcedir, "api", "terms.json"])
for (filename, func, urlpath) in ((common, self._get_common_graph, commonpath),
(terms, self._get_term_graph, termspath)):
g = func(self.config.url + urlpath[1:])
d = json.loads(g.serialize(format="json-ld", context=contextdict,
indent=4).decode("utf-8"))
# d might not contain a @context (if contextdict == {}, ie
# no repos are given)
if '@context' in d:
d['@context'] = contextpath
if self.config.legacyapi:
d = self._convert_legacy_jsonld(d, self.config.url + urlpath[1:])
with open(filename, "w") as fp:
s = json.dumps(d, indent=4, separators=(', ', ': '), sort_keys=True)
fp.write(s)
files.append(self._filepath_to_urlpath(filename, 2))
if self.config.legacyapi:
# copy ui explorer app to <url>/rsrc/ui/ -- this does not get
# included in files
targetdir = os.sep.join([self.resourcedir, "ui"])
self.resourceloader.extractdir("ui", targetdir)
return files
def _convert_legacy_jsonld(self, indata, rooturi):
# the json structure should be a top node containing only
# @context, iri (localhost:8000/var/terms), type (foaf:Document)
# and topic - a list of dicts, where each dict looks like:
#
# {"iri" : "referatserie",
# "comment" : "Anger vilken referatserie som referatet eventuellt tillhör.",
# "label" : "Referatserie",
# "type" : "DatatypeProperty"}
out = {}
topics = []
# the property containing the id/uri for the
# record may be under @id or iri, depending on
# whether self.config.legacyapi was in effect for
# _get_json_context()
if self.config.legacyapi:
idfld = 'iri'
else:
idfld = '@id'
# indata might be a mapping containing a list of mappings
# under @graph, or it might just be the actual list.
wantedlist = None
if isinstance(indata, list):
wantedlist = indata
else:
for topkey, topval in indata.items():
if topkey == "@graph":
wantedlist = topval
break
if not wantedlist:
self.log.warning(
"Couldn't find list of mappings in %s, topics will be empty" %
indata)
else:
shortened = {}
for subject in sorted(wantedlist, key=lambda x: x["iri"]):
if subject[idfld] == rooturi:
for key, value in subject.items():
if key in (idfld, 'foaf:topic'):
continue
out[key] = value
else:
for key in subject:
if isinstance(subject[key], list):
# make sure multiple values are sorted for
# the same reason as below
subject[key].sort()
# FIXME: We want to use just the urileaf for
# legacyapi clients (ie Standard instead of
# bibo:Standard) but to be proper json-ld, this
# requires that we define contexts for this. Which
# we don't (yet)
if ("iri" in subject and
":" in subject["iri"] and
"://" not in subject["iri"]):
short = subject["iri"].split(":", 1)[1]
if short in shortened:
self.log.warning(
"Cannot shorten IRI %s -> %s, already defined (%s)" %
(subject["iri"], short, shortened[short]))
del subject["iri"] # skips adding this to topics
else:
shortened[short] = subject["iri"]
subject["iri"] = short
if "iri" in subject and subject["iri"]:
topics.append(subject)
# make sure the triples are in a predictable order, so we can
# compare on the JSON level for testing
out['topic'] = sorted(topics, key=lambda x: x[idfld])
out['iri'] = rooturi
if '@context' in indata:
out['@context'] = indata['@context']
return out
def _get_json_context(self):
data = {}
# step 1: define all prefixes
for repo in self.repos:
for (prefix, ns) in repo.ns.items():
if prefix in data:
assert data[prefix] == str(
ns), "Conflicting URIs for prefix %s" % prefix
else:
data[prefix] = str(ns)
# foaf and rdfs must always be defined prefixes
data["foaf"] = "http://xmlns.com/foaf/0.1/"
data["rdfs"] = "http://www.w3.org/2000/01/rdf-schema#"
# the legacy api client expects some terms to be available using
# shortened forms (eg 'label' instead of 'rdfs:label'), so we must
# define them in our context
if self.config.legacyapi:
data['iri'] = "@id"
data['type'] = "@type"
data['label'] = 'rdfs:label'
data['name'] = 'foaf:name'
data['altLabel'] = 'skos:altLabel'
# data["@language"] = "en" # how to set this? majority vote of
# repos / documents? note that it's
# only a default.
return data
def _get_term_graph(self, graphuri):
# produce a rdf graph of the terms (classes and properties) in
# the vocabs we're using. This should preferably entail
# loading the vocabularies (stored as RDF/OWL documents), and
# expressing all the things that are owl:*Property, owl:Class,
# rdf:Property and rdf:Class. As an intermediate step, we
# could have preprocessed rdf graphs (stored in
# res/vocab/dcterms.ttl, res/vocab/bibo.ttl etc) derived from the
# vocabularies and pull them in like we pull in namespaces in
# self.ns The rdf graph should be rooted in an url (eg
# http://localhost:8080/var/terms, and then have each term as
# a foaf:topic. Each term should be described with its
# rdf:type, rdfs:label (most important!) and possibly
# rdfs:comment
root = URIRef(graphuri)
g = Graph()
g.add((root, RDF.type, FOAF.Document))
bigg = Graph()
paths = set()
for repo in self.repos:
for p, ns in repo.ns.items():
if p in ("rdf", "rdfs", "owl"):
continue
g.bind(p, ns)
resourcename = "vocab/%s.ttl" % p
if repo.resourceloader.exists(resourcename):
ontopath = repo.resourceloader.filename(resourcename)
if ontopath not in paths:
self.log.debug("Loading vocabulary %s" % ontopath)
with open(ontopath) as onto:
bigg.parse(onto, format="turtle")
paths.add(ontopath)
g.bind("foaf", "http://xmlns.com/foaf/0.1/")
for (s, p, o) in bigg:
if p in (RDF.type, RDFS.label, RDFS.comment):
if isinstance(s, BNode): # occurs in the def of foaf:member
continue
g.add((root, FOAF.topic, s)) # unless we've already added it?
if isinstance(o, Literal): # remove language typing info
o = Literal(str(o))
g.add((s, p, o)) # control duplicates somehow
return g
def _get_common_graph(self, graphuri):
# create a graph with foaf:names for all entities (publishers,
# publication series etc) that our data mentions.
root = URIRef(graphuri)
g = Graph()
g.bind("skos", SKOS)
g.bind("foaf", FOAF)
g.add((root, RDF.type, FOAF.Document))
paths = set()
bigg = Graph()
for repo in self.repos:
for cls in inspect.getmro(repo.__class__):
if hasattr(cls, "alias"):
resourcename = "extra/%s.ttl" % cls.alias
if repo.resourceloader.exists(resourcename):
commonpath = repo.resourceloader.filename(resourcename)
if commonpath not in paths:
self.log.debug("loading data %s" % commonpath)
with open(commonpath) as common:
bigg.parse(common, format="turtle")
paths.add(commonpath)
for (s, p, o) in bigg:
if p in (FOAF.name, SKOS.prefLabel,
SKOS.altLabel, BIBO.identifier):
g.add((root, FOAF.topic, s))
# strip any typing/langtagging (because of reasons)
if isinstance(o, Literal):
o = Literal(str(o))
g.add((s, p, o))
# try to find a type
g.add((s, RDF.type, bigg.value(s, RDF.type)))
return g
def _filepath_to_urlpath(self, path, keep_segments=2):
"""
:param path: the full or relative filepath to transform into a urlpath
:param keep_segments: the number of directory segments to keep (the ending filename is always kept)
"""
# data/repo/rsrc/js/main.js, 3 -> repo/rsrc/js/main.js
# /var/folders/tmp4q6b1g/rsrc/resources.xml, 1 -> rsrc/resources.xml
# C:\docume~1\owner\locals~1\temp\tmpgbyuk7\rsrc\css\test.css, 2 - rsrc/css/test.css
path = path.replace(os.sep, "/")
urlpath = "/".join(path.split("/")[-(keep_segments + 1):])
# print("_filepath_to_urlpath (%s): %s -> %s" % (keep_segments, path, urlpath))
return urlpath