Source code for stardog.content

"""Content that can be loaded into Stardog.
"""
import contextlib
import os

import requests

from . import content_types as content_types


[docs]class Content: """Content base class.""" pass
[docs]class Raw(Content): """User-defined content."""
[docs] def __init__(self, content, content_type=None, content_encoding=None, name=None): """Initializes a Raw object. Args: content (obj): Object representing the content (e.g., str, file) content_type (str, optional): Content type content_encoding (str, optional): Content encoding name (str, optional): Object name Examples: >>> Raw(':luke a :Human', 'text/turtle', name='data.ttl') >>> Raw(':βüãäoñr̈ a :Employee .'.encode('utf-8'), 'text/turtle') """ self.raw = content self.name = name (c_enc, c_type) = content_types.guess_rdf_format(name) self.content_type = content_type if content_type else c_type self.content_encoding = content_encoding if content_encoding else c_enc
[docs] @contextlib.contextmanager def data(self): yield self.raw
[docs]class File(Content): """File-based content."""
[docs] def __init__( self, file=None, content_type=None, content_encoding=None, name=None, fname=None ): """Initializes a File object. Args: fname (str): Filename content_type (str, optional): Content type. It will be automatically detected from the filename content_encoding (str, optional): Content encoding. It will be automatically detected from the filename name (str, optional): Object name. It will be automatically detected from the filename Examples: >>> File('data.ttl') >>> File('data.doc', 'application/msword') """ # file as a special meaning in IDE such as pycharm where it shows you a file picker. It helps you find the file # which is important for this type of call, but we need to be backward compatible in case they use fname= if fname: file = fname assert file, "Parameter file is required" self.fname = file (c_enc, c_type) = content_types.guess_rdf_format(file) self.content_type = content_type if content_type else c_type self.content_encoding = content_encoding if content_encoding else c_enc self.name = name if name else os.path.basename(file)
[docs] @contextlib.contextmanager def data(self): with open(self.fname, "rb") as f: yield f
[docs]class MappingRaw(Content): """User-defined Mapping."""
[docs] def __init__(self, content, syntax=None, name=None): """Initializes a Raw object. Args: content (str): Mapping in raw form syntax (str, optional): Whether it r2rml or sms type. If not provided, it will try to detect it from name if provided, otherwise from the content itselft name (str, optional): Object name Examples: >>> MappingRaw('''MAPPING FROM SQL { SELECT * FROM `benchmark`.`person` } TO { ?subject rdf:type :person } WHERE { BIND(template("http://api.stardog.com/person/nr={nr}") AS ?subject) }''') """ self.raw = content self.name = name c_syntax = None if name: c_syntax = content_types.guess_mapping_format(name) if c_syntax is None: c_syntax = content_types.guess_mapping_format_from_content(content) self.syntax = syntax if syntax else c_syntax
[docs] @contextlib.contextmanager def data(self): yield self.raw
[docs]class MappingFile(Content): """File-based content."""
[docs] def __init__(self, file: str, syntax=None, name=None): """Initializes a File object. Args: file (str): Filename syntax (str, optional): Whether it r2rml or sms type. It will be automatically detected from the filename, if possible otherwise it will default to system default Examples: >>> MappingFile('data.sms') >>> MappingFile('data.sms2') >>> MappingFile('data.rq') >>> MappingFile('data.r2rml') """ self.fname = file self.syntax = syntax if syntax else content_types.guess_mapping_format(file) self.name = name if name else os.path.basename(file)
[docs] @contextlib.contextmanager def data(self): with open(self.fname, "rb") as f: yield f
[docs]class ImportRaw(Content): """User-defined content."""
[docs] def __init__( self, content, input_type=None, separator=None, content_type=None, content_encoding=None, name=None, ): """Initializes a Raw object. Args: content (obj): Object representing the content (e.g., str, file) input_type (str): DELIMITED or JSON seperator (str): Required if it's DELIMITED CONTENT content_type (str, optional): Content type content_encoding (str, optional): Content encoding name (str, optional): Object name if name is provided like a pseudo filename, ie data.csv, data.tsv, or data.json, it will auto-detect most required parameter, otherwise you must specify them. Examples: >>> ImportRaw('a,b,c', name='data.csv') >>> ImportRaw('a\tb\tc', name='data.tsv') >>> ImportRaw({'foo':'bar'}, name='data.json') """ self.raw = content self.name = name (c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format( name ) self.content_type = content_type if content_type else c_type self.content_encoding = content_encoding if content_encoding else c_enc self.input_type = input_type if input_type else c_input_type self.separator = separator if separator else c_separator
[docs] @contextlib.contextmanager def data(self): yield self.raw
[docs]class ImportFile(Content): """File-based content for Delimited and JSON file."""
[docs] def __init__( self, file, input_type=None, content_type=None, content_encoding=None, separator=None, name=None, ): """Initializes a File object. Args: file (str): Filename input_type (str): DELIMITED or JSON separator (str): Required if it's DELIMITED CONTENT content_type (str, optional): Content type content_encoding (str, optional): Content encoding name (str, optional): Object name It will be automatically detected from the filename, if possible otherwise it will default to system default Examples: >>> ImportFile('data.csv') >>> ImportFile('data.tsv') >>> ImportFile('data.txt','DELIMITED',"\t" ) >>> MappingFile('data.json') """ self.fname = file (c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format( file ) self.content_type = content_type if content_type else c_type self.content_encoding = content_encoding if content_encoding else c_enc self.input_type = input_type if input_type else c_input_type self.separator = separator if separator else c_separator self.name = name if name else os.path.basename(file)
[docs] @contextlib.contextmanager def data(self): with open(self.fname, "rb") as f: yield f
[docs]class URL(Content): """Url-based content."""
[docs] def __init__(self, url, content_type=None, content_encoding=None, name=None): """Initializes a URL object. Args: url (str): Url content_type (str, optional): Content type. It will be automatically detected from the url content_encoding (str, optional): Content encoding. It will be automatically detected from the filename name (str, optional): Object name. It will be automatically detected from the url Examples: >>> URL('http://example.com/data.ttl') >>> URL('http://example.com/data.doc', 'application/msword') """ self.url = url (c_enc, c_type) = content_types.guess_rdf_format(url) self.content_type = content_type if content_type else c_type self.content_encoding = content_encoding if content_encoding else c_enc self.name = name if name else os.path.basename(url)
[docs] @contextlib.contextmanager def data(self): with requests.get(self.url, stream=True) as r: yield r.content