Skip to content

Commit

Permalink
hotfix: teradata cast (#482)
Browse files Browse the repository at this point in the history
* swapping out teradata import

* fixing bad imports

* changing imports and trying cast to VARCHAR

* trying operator concat instead

* overriding ibis return type

* maybe this will work

* how about now

* how about now

* I should probably choose different commit messages

* progress

* progress

* I hate teradata

* I hate teradata
  • Loading branch information
renzokuken committed May 19, 2022
1 parent 8106523 commit 1fb7e8b
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 39 deletions.
3 changes: 0 additions & 3 deletions third_party/ibis/ibis_teradata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

from typing import Optional

import google.auth.credentials
import google.cloud.bigquery # noqa: F401, fail early if bigquery is missing

import ibis.common.exceptions as com
from client import TeradataClient # TODO make non local
from compiler import dialect # TODO make non local
Expand Down
61 changes: 27 additions & 34 deletions third_party/ibis/ibis_teradata/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ibis.backends.base_sql.identifiers import base_identifiers

from ibis.common.exceptions import UnsupportedOperationError
from ibis_bigquery.datatypes import ibis_type_to_bigquery_type
from .datatypes import ibis_type_to_teradata_type
from ibis.backends.base_sql import fixed_arity, literal, reduction, unary
from ibis.backends.base_sql.compiler import (
BaseExprTranslator,
Expand Down Expand Up @@ -128,13 +128,6 @@ def _quote_identifier(self, name, quotechar='"', force=False):
return name


class BigQueryTableSetFormatter(BaseTableSetFormatter):
def _quote_identifier(self, name):
if re.match(r'^[A-Za-z][A-Za-z_0-9]*$', name):
return name
return '`{}`'.format(name)


class TeradataUDFNode(ops.ValueOp):
pass

Expand Down Expand Up @@ -256,7 +249,7 @@ def generate_setup_queries(
): # TODO validate if I need to override this function
queries = map(
partial(TeradataUDFDefinition, context=self.context),
lin.traverse(find_bigquery_udf, self.expr),
lin.traverse(find_teradata_udf, self.expr),
)

# UDFs are uniquely identified by the name of the Node subclass we
Expand Down Expand Up @@ -300,7 +293,7 @@ def _name_expr(formatted_expr, quoted_name):
return "{} AS {}".format(formatted_expr, quoted_name)


def find_bigquery_udf(expr):
def find_teradata_udf(expr):
if isinstance(expr.op(), TeradataUDFNode):
result = expr
else:
Expand All @@ -323,25 +316,25 @@ def extract_field_formatter(translator, expr):
return extract_field_formatter


bigquery_cast = Dispatcher("bigquery_cast")
teradata_cast = Dispatcher("teradata_cast")


@bigquery_cast.register(str, dt.Timestamp, dt.Integer)
def bigquery_cast_timestamp_to_integer(compiled_arg, from_, to):
@teradata_cast.register(str, dt.Timestamp, dt.Integer)
def teradata_cast_timestamp_to_integer(compiled_arg, from_, to):
return "UNIX_MICROS({})".format(compiled_arg)


@bigquery_cast.register(str, dt.DataType, dt.DataType)
def bigquery_cast_generate(compiled_arg, from_, to):
sql_type = ibis_type_to_bigquery_type(to)
@teradata_cast.register(str, dt.DataType, dt.DataType)
def teradata_cast_generate(compiled_arg, from_, to):
sql_type = ibis_type_to_teradata_type(to)
return "CAST({} AS {})".format(compiled_arg, sql_type)


def _cast(translator, expr):
op = expr.op()
arg, target_type = op.args
arg_formatted = translator.translate(arg)
return bigquery_cast(arg_formatted, arg.type(), target_type)
return teradata_cast(arg_formatted, arg.type(), target_type)


def _struct_field(translator, expr):
Expand Down Expand Up @@ -375,7 +368,7 @@ def _string_find(translator, expr):


def _translate_pattern(translator, pattern):
# add 'r' to string literals to indicate to BigQuery this is a raw string
# add 'r' to string literals to indicate to Teradata this is a raw string
return "r" * isinstance(pattern.op(), ops.Literal) + translator.translate(pattern)


Expand Down Expand Up @@ -405,11 +398,11 @@ def _regex_replace(translator, expr):


def _string_concat(translator, expr):
return "CONCAT({})".format(", ".join(map(translator.translate, expr.op().arg)))
return "||".join(map(translator.translate, expr.op().arg))

def _string_join(translator, expr):
sep, args = expr.op().args
return "CONCAT({})".format(", ".join(map(translator.translate, expr.op().arg)))
return "||".join(map(translator.translate, expr.op().arg))

# def _string_join(translator, expr):
# sep, args = expr.op().args
Expand Down Expand Up @@ -484,7 +477,7 @@ def _arbitrary(translator, expr):

if how not in (None, "first"):
raise UnsupportedOperationError(
"{!r} value not supported for arbitrary in BigQuery".format(how)
"{!r} value not supported for arbitrary in Teradata".format(how)
)

return "ANY_VALUE({})".format(translator.translate(arg))
Expand Down Expand Up @@ -517,7 +510,7 @@ def truncator(translator, expr):
valid_unit = units.get(unit)
if valid_unit is None:
raise UnsupportedOperationError(
"BigQuery does not support truncating {} values to unit "
"Teradata does not support truncating {} values to unit "
"{!r}".format(arg.type(), unit)
)
return "{}_TRUNC({}, {})".format(kind, trans_arg, valid_unit)
Expand All @@ -533,7 +526,7 @@ def _formatter(translator, expr):
unit = offset.type().unit
if unit not in units:
raise UnsupportedOperationError(
"BigQuery does not allow binary operation "
"Teradata does not allow binary operation "
"{} with INTERVAL offset {}".format(func, unit)
)
formatted_arg = translator.translate(arg)
Expand Down Expand Up @@ -584,7 +577,7 @@ def _formatter(translator, expr):
ops.Sign: unary("SIGN"),
ops.Modulus: fixed_arity("MOD", 2),
ops.Date: unary("DATE"),
# BigQuery doesn't have these operations built in.
# Teradata doesn't have these operations built in.
# ops.ArrayRepeat: _array_repeat,
# ops.ArraySlice: _array_slice,
ops.Literal: _literal,
Expand Down Expand Up @@ -618,20 +611,20 @@ def _formatter(translator, expr):


@compiles(ops.DayOfWeekIndex)
def bigquery_day_of_week_index(t, e):
def teradata_day_of_week_index(t, e):
arg = e.op().args[0]
arg_formatted = t.translate(arg)
return "MOD(EXTRACT(DAYOFWEEK FROM {}) + 5, 7)".format(arg_formatted)


@rewrites(ops.DayOfWeekName)
def bigquery_day_of_week_name(e):
def teradata_day_of_week_name(e):
arg = e.op().args[0]
return arg.strftime("%A")


@compiles(ops.Divide)
def bigquery_compiles_divide(t, e):
def teradata_compiles_divide(t, e):
return "IEEE_DIVIDE({}, {})".format(*map(t.translate, e.op().args))


Expand Down Expand Up @@ -710,9 +703,9 @@ def compiles_timestamp_from_unix(t, e):

@compiles(ops.Floor)
def compiles_floor(t, e):
bigquery_type = ibis_type_to_bigquery_type(e.type())
teradata_type = ibis_type_to_teradata_type(e.type())
arg = e.op().arg
return "CAST(FLOOR({}) AS {})".format(t.translate(arg), bigquery_type)
return "CAST(FLOOR({}) AS {})".format(t.translate(arg), teradata_type)


@compiles(ops.CMSMedian)
Expand Down Expand Up @@ -752,27 +745,27 @@ def compiles_covar(translator, expr):
@rewrites(ops.All)
@rewrites(ops.NotAny)
@rewrites(ops.NotAll)
def bigquery_any_all_no_op(expr):
def teradata_any_all_no_op(expr):
return expr


@compiles(ops.Any)
def bigquery_compile_any(translator, expr):
def teradata_compile_any(translator, expr):
return "LOGICAL_OR({})".format(*map(translator.translate, expr.op().args))


@compiles(ops.NotAny)
def bigquery_compile_notany(translator, expr):
def teradata_compile_notany(translator, expr):
return "LOGICAL_AND(NOT ({}))".format(*map(translator.translate, expr.op().args))


@compiles(ops.All)
def bigquery_compile_all(translator, expr):
def teradata_compile_all(translator, expr):
return "LOGICAL_AND({})".format(*map(translator.translate, expr.op().args))


@compiles(ops.NotAll)
def bigquery_compile_notall(translator, expr):
def teradata_compile_notall(translator, expr):
return "LOGICAL_OR(NOT ({}))".format(*map(translator.translate, expr.op().args))


Expand Down
6 changes: 4 additions & 2 deletions third_party/ibis/ibis_teradata/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,18 @@ def to_ibis_from_SZ(cls, col_data, return_ibis_type=True):
@ibis_type_to_teradata_type.register(str)
def trans_string_default(datatype):
return ibis_type_to_teradata_type(dt.dtype(datatype))



@ibis_type_to_teradata_type.register(dt.DataType)
def trans_default(t):
return ibis_type_to_teradata_type(t, TypeTranslationContext())
# return ibis_type_to_teradata_type(t, TypeTranslationContext())
return "VARCHAR(255)"


@ibis_type_to_teradata_type.register(str, TypeTranslationContext)
def trans_string_context(datatype, context):
return ibis_type_to_teradata_type(dt.dtype(datatype), context)
return "VARCHAR"


@ibis_type_to_teradata_type.register(dt.Floating, TypeTranslationContext)
Expand Down

0 comments on commit 1fb7e8b

Please sign in to comment.