"""Content that can be loaded into Stardog.
"""
import contextlib
import os
from typing import Optional
import requests
from . import content_types as content_types
[docs]class Content:
"""Content base class."""
pass
[docs]class Raw(Content):
"""User-defined content."""
[docs] def __init__(
self,
content: object,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
):
"""Initializes a Raw object.
:param content: Object representing the content (e.g., str, file)
:param content_type: Content type
:param content_encoding: Content encoding
:param name: Object name
Examples:
>>> Raw(':luke a :Human', 'text/turtle', name='data.ttl')
>>> Raw(':βüãäoñr̈ a :Employee .'.encode('utf-8'), 'text/turtle')
"""
self.raw = content
self.name = name
(c_enc, c_type) = content_types.guess_rdf_format(name)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
[docs] @contextlib.contextmanager
def data(self):
yield self.raw
[docs]class File(Content):
"""File-based content."""
[docs] def __init__(
self,
file: Optional[str] = None,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
fname: Optional[str] = None,
):
"""Initializes a File object.
:param file: the filename/path of the file
:param content_type: Content type.
It will be automatically detected from the filename
:param content_encoding: Content encoding.
It will be automatically detected from the filename
:param name: Name of the file object.
It will be automatically detected from the filename
:param fname: backward compatible parameter for ``file``
Examples:
>>> File('data.ttl')
>>> File('data.doc', 'application/msword')
"""
# file as a special meaning in IDE such as pycharm where it shows you a file picker. It helps you find the file
# which is important for this type of call, but we need to be backward compatible in case they use fname=
if fname:
file = fname
assert file, "Parameter file is required"
self.fname = file
(c_enc, c_type) = content_types.guess_rdf_format(file)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.name = name if name else os.path.basename(file)
[docs] @contextlib.contextmanager
def data(self):
with open(self.fname, "rb") as f:
yield f
[docs]class MappingRaw(Content):
"""User-defined Mapping."""
[docs] def __init__(
self, content: str, syntax: Optional[str] = None, name: Optional[str] = None
):
"""Initializes a MappingRaw object.
:param content: the actual mapping content (e.g. ``'MAPPING\\n FROM SQL ...'``)
:param syntax: The mapping syntax (``'STARDOG'``, ``'R2RML'``, or ``'SMS2'``)
If not provided, it will try to detect it from ``name`` if provided, otherwise from the content itself
:param name: name of object
Examples:
>>> mapping = '''
MAPPING
FROM SQL {
SELECT *
FROM `benchmark`.`person`
}
TO {
?subject rdf:type :person
} WHERE {
BIND(template("http://api.stardog.com/person/nr={nr}") AS ?subject)
}
'''
>>> MappingRaw(mapping)
"""
self.raw = content
self.name = name
c_syntax = None
if name:
c_syntax = content_types.guess_mapping_format(name)
if c_syntax is None:
c_syntax = content_types.guess_mapping_format_from_content(content)
self.syntax = syntax if syntax else c_syntax
[docs] @contextlib.contextmanager
def data(self):
yield self.raw
[docs]class MappingFile(Content):
"""File-based content."""
[docs] def __init__(
self, file: str, syntax: Optional[str] = None, name: Optional[str] = None
):
"""Initializes a File object.
:param file: the filename/path of the file
:param syntax: The mapping syntax (``'STARDOG'``, ``'R2RML'``, or ``'SMS2'``)
If not provided, it will try to detect it from the ``file``'s extension.
:param name: the name of the object. If not provided, will fall back to the basename of the ``file``.
Examples:
>>> MappingFile('data.sms')
>>> MappingFile('data.sms2')
>>> MappingFile('data.rq')
>>> MappingFile('data.r2rml')
"""
self.fname = file
self.syntax = syntax if syntax else content_types.guess_mapping_format(file)
self.name = name if name else os.path.basename(file)
[docs] @contextlib.contextmanager
def data(self):
with open(self.fname, "rb") as f:
yield f
[docs]class ImportRaw(Content):
"""User-defined content."""
[docs] def __init__(
self,
content: object,
input_type: Optional[str] = None,
separator: Optional[str] = None,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
):
"""Initializes a Raw object.
:param content: Object representing the content (e.g., str, file)
:param input_type: ``'DELIMITED'`` or ``'JSON'``
:param separator: Required if ``input_type`` is ``'DELIMITED'``. Use ``','`` for a CSV. Use ``\\\\t`` for a TSV.
:param content_type: Content type
:param content_encoding: Content encoding
:param name: Object name
.. note::
if ``name`` is provided like a pseudo filename (i.e. ``'data.csv'``, ``'data.tsv'``, or ``'data.json'``), it will auto-detect most
required parameters (``input_type``, ``separator``, ``content_type``, ``content_encoding``) - otherwise you must specify them.
Examples:
>>> ImportRaw('a,b,c', name='data.csv')
>>> ImportRaw('a\tb\tc', name='data.tsv')
>>> ImportRaw({'foo':'bar'}, name='data.json')
"""
self.raw = content
self.name = name
(c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format(
name
)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.input_type = input_type if input_type else c_input_type
self.separator = separator if separator else c_separator
[docs] @contextlib.contextmanager
def data(self):
yield self.raw
[docs]class ImportFile(Content):
"""File-based content for Delimited and JSON file."""
[docs] def __init__(
self,
file: str,
input_type: Optional[str] = None,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
separator: Optional[str] = None,
name: Optional[str] = None,
):
"""Initializes a File object.
:param file: filename/path of the file
:param input_type: ``'DELIMITED'`` or ``'JSON'``
:param content_type: Content type
:param content_encoding: Content encoding
:param separator: Required if ``input_type`` is ``'DELIMITED'``. Use ``','`` for a CSV. Use ``\\\\t`` for a TSV.
:param name: Object name.
It will be automatically detected from the ``file`` if omitted.
.. note::
If ``file`` has a recognized extension (i.e. ``'data.csv'``, ``'data.tsv'``, or ``'data.json'``), it will auto-detect most
required parameters (``input_type``, ``separator``, ``content_type``, ``content_encoding``) - otherwise you must specify them.
Examples:
>>> ImportFile('data.csv')
>>> ImportFile('data.tsv')
>>> ImportFile('data.txt','DELIMITED',"\\\\t" )
>>> ImportFile('data.json')
"""
self.fname = file
(c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format(
file
)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.input_type = input_type if input_type else c_input_type
self.separator = separator if separator else c_separator
self.name = name if name else os.path.basename(file)
[docs] @contextlib.contextmanager
def data(self):
with open(self.fname, "rb") as f:
yield f
[docs]class URL(Content):
"""Url-based content."""
[docs] def __init__(
self,
url: str,
content_type: Optional[str] = None,
content_encoding: Optional[str] = None,
name: Optional[str] = None,
):
"""Initializes a URL object.
:param url: URL to the content
:param content_type: Content type.
It will be automatically detected from the ``url`` if not provided.
:param content_encoding: Content encoding.
It will be automatically detected from the ``url`` if not provided.
:param name: Object name.
It will be automatically detected from the ``url`` if not provided.
Examples:
>>> URL('http://example.com/data.ttl')
>>> URL('http://example.com/data.doc', 'application/msword')
"""
self.url = url
(c_enc, c_type) = content_types.guess_rdf_format(url)
self.content_type = content_type if content_type else c_type
self.content_encoding = content_encoding if content_encoding else c_enc
self.name = name if name else os.path.basename(url)
[docs] @contextlib.contextmanager
def data(self):
with requests.get(self.url, stream=True) as r:
yield r.content