Source code for stardog.content

"""Content that can be loaded into Stardog.
"""
import contextlib
import os

import requests

from . import content_types as content_types


[docs]class Content:
    """Content base class."""

    pass


[docs]class Raw(Content):
    """User-defined content."""

[docs]    def __init__(self, content, content_type=None, content_encoding=None, name=None):
        """Initializes a Raw object.

        Args:
          content (obj): Object representing the content (e.g., str, file)
          content_type (str, optional): Content type
          content_encoding (str, optional): Content encoding
          name (str, optional): Object name

        Examples:
          >>> Raw(':luke a :Human', 'text/turtle', name='data.ttl')
          >>> Raw(':βüãäoñr̈ a :Employee .'.encode('utf-8'), 'text/turtle')

        """
        self.raw = content
        self.name = name

        (c_enc, c_type) = content_types.guess_rdf_format(name)
        self.content_type = content_type if content_type else c_type
        self.content_encoding = content_encoding if content_encoding else c_enc

[docs]    @contextlib.contextmanager
    def data(self):
        yield self.raw


[docs]class File(Content):
    """File-based content."""

[docs]    def __init__(
        self, file=None, content_type=None, content_encoding=None, name=None, fname=None
    ):
        """Initializes a File object.

        Args:
          fname (str): Filename
          content_type (str, optional): Content type.
            It will be automatically detected from the filename
          content_encoding (str, optional): Content encoding.
            It will be automatically detected from the filename
          name (str, optional): Object name.
            It will be automatically detected from the filename

        Examples:
          >>> File('data.ttl')
          >>> File('data.doc', 'application/msword')
        """

        # file as a special meaning in IDE such as pycharm where it shows you a file picker. It helps you find the file
        # which is important for this type of call, but we need to be backward compatible in case they use fname=

        if fname:
            file = fname

        assert file, "Parameter file is required"

        self.fname = file
        (c_enc, c_type) = content_types.guess_rdf_format(file)
        self.content_type = content_type if content_type else c_type
        self.content_encoding = content_encoding if content_encoding else c_enc
        self.name = name if name else os.path.basename(file)

[docs]    @contextlib.contextmanager
    def data(self):
        with open(self.fname, "rb") as f:
            yield f


[docs]class MappingRaw(Content):
    """User-defined Mapping."""

[docs]    def __init__(self, content, syntax=None, name=None):
        """Initializes a Raw object.

                Args:
                  content (str): Mapping in raw form
                  syntax (str, optional): Whether it r2rml or sms type.
                    If not provided, it will try to detect it from name if provided, otherwise from the content itselft
                  name (str, optional): Object name

                Examples:
                  >>> MappingRaw('''MAPPING
        FROM SQL {
          SELECT *
          FROM `benchmark`.`person`
        }
        TO {
          ?subject rdf:type :person
        } WHERE {
          BIND(template("http://api.stardog.com/person/nr={nr}") AS ?subject)
        }''')
        """
        self.raw = content
        self.name = name

        c_syntax = None
        if name:
            c_syntax = content_types.guess_mapping_format(name)

        if c_syntax is None:
            c_syntax = content_types.guess_mapping_format_from_content(content)

        self.syntax = syntax if syntax else c_syntax

[docs]    @contextlib.contextmanager
    def data(self):
        yield self.raw


[docs]class MappingFile(Content):
    """File-based content."""

[docs]    def __init__(self, file: str, syntax=None, name=None):
        """Initializes a File object.

        Args:
          file (str): Filename
          syntax (str, optional): Whether it r2rml or sms type.
            It will be automatically detected from the filename, if possible otherwise it will default to system default

        Examples:
          >>> MappingFile('data.sms')
          >>> MappingFile('data.sms2')
          >>> MappingFile('data.rq')
          >>> MappingFile('data.r2rml')
        """
        self.fname = file
        self.syntax = syntax if syntax else content_types.guess_mapping_format(file)
        self.name = name if name else os.path.basename(file)

[docs]    @contextlib.contextmanager
    def data(self):
        with open(self.fname, "rb") as f:
            yield f


[docs]class ImportRaw(Content):
    """User-defined content."""

[docs]    def __init__(
        self,
        content,
        input_type=None,
        separator=None,
        content_type=None,
        content_encoding=None,
        name=None,
    ):
        """Initializes a Raw object.

        Args:
          content (obj): Object representing the content (e.g., str, file)
          input_type (str): DELIMITED or JSON
          seperator (str): Required if it's  DELIMITED CONTENT
          content_type (str, optional): Content type
          content_encoding (str, optional): Content encoding
          name (str, optional): Object name

          if name is provided like a pseudo filename, ie data.csv, data.tsv, or data.json, it will auto-detect most
          required parameter, otherwise you must specify them.

        Examples:
          >>> ImportRaw('a,b,c',  name='data.csv')
          >>> ImportRaw('a\tb\tc', name='data.tsv')
          >>> ImportRaw({'foo':'bar'}, name='data.json')

        """
        self.raw = content
        self.name = name

        (c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format(
            name
        )

        self.content_type = content_type if content_type else c_type
        self.content_encoding = content_encoding if content_encoding else c_enc
        self.input_type = input_type if input_type else c_input_type
        self.separator = separator if separator else c_separator

[docs]    @contextlib.contextmanager
    def data(self):
        yield self.raw


[docs]class ImportFile(Content):
    """File-based content for Delimited and JSON file."""

[docs]    def __init__(
        self,
        file,
        input_type=None,
        content_type=None,
        content_encoding=None,
        separator=None,
        name=None,
    ):
        """Initializes a File object.

        Args:
          file (str): Filename
          input_type (str): DELIMITED or JSON
          separator (str): Required if it's  DELIMITED CONTENT
          content_type (str, optional): Content type
          content_encoding (str, optional): Content encoding
          name (str, optional): Object name
            It will be automatically detected from the filename, if possible otherwise it will default to system default

        Examples:
          >>> ImportFile('data.csv')
          >>> ImportFile('data.tsv')
          >>> ImportFile('data.txt','DELIMITED',"\t" )
          >>> MappingFile('data.json')
        """

        self.fname = file

        (c_enc, c_type, c_input_type, c_separator) = content_types.guess_import_format(
            file
        )

        self.content_type = content_type if content_type else c_type
        self.content_encoding = content_encoding if content_encoding else c_enc
        self.input_type = input_type if input_type else c_input_type
        self.separator = separator if separator else c_separator

        self.name = name if name else os.path.basename(file)

[docs]    @contextlib.contextmanager
    def data(self):
        with open(self.fname, "rb") as f:
            yield f


[docs]class URL(Content):
    """Url-based content."""

[docs]    def __init__(self, url, content_type=None, content_encoding=None, name=None):
        """Initializes a URL object.

        Args:
          url (str): Url
          content_type (str, optional): Content type.
              It will be automatically detected from the url
          content_encoding (str, optional): Content encoding.
              It will be automatically detected from the filename
          name (str, optional): Object name.
              It will be automatically detected from the url

        Examples:
          >>> URL('http://example.com/data.ttl')
          >>> URL('http://example.com/data.doc', 'application/msword')
        """
        self.url = url
        (c_enc, c_type) = content_types.guess_rdf_format(url)
        self.content_type = content_type if content_type else c_type
        self.content_encoding = content_encoding if content_encoding else c_enc
        self.name = name if name else os.path.basename(url)

[docs]    @contextlib.contextmanager
    def data(self):
        with requests.get(self.url, stream=True) as r:
            yield r.content