ym_exec()

Read(571) Label: call, python, ymodel,

Description:

Execute a Python file to define a data file and the target variable for data scoring.

Syntax:

ym_exec(pyfile,data, jsonstr)

Note:

The external library function (See External Library Guide) executes a Python file. Parameter data is a table sequence or a table with column headers. It defines a data file containing a target variable. The target variable is specified through parameter jsonstr, which is a JSON string holding the column of the desirable target variable. Both data and jsonstr are called by the Python file pyfile.
  A parameter passed in need to match the one in the Python file receiving it to make the latter parse it correctly.

Parameter:

pyfile

A Python module file that implements def apply(lists) interface; parameter lists is the list data type

data

A data file

jsonstr

A JSON string like:
{target:0,n_components:3,deflation_mode:'regression',

  mode:'A',norm_y_weights:False,

  scale:False,algorithm:'nipals',

  max_iter:500,tol:0.000001,copy:True}

In which target specifies the column holding the target variable

Return value:

Table sequence

Example:

 

Example 1: To make an ordinary call from the Python file, use the following demo.py file:

 

interface def apply(lists)

# -*- coding: utf-8 -*-

 

def apply(lists):   

  d = demo(lists)

  ls = d.do_print()

  return ls

 

class demo():

  def __init__(self,lists):

  self.lists = lists

  pass

 

  def count(self, num):

    return 1+10*num

   

  def do_print(self):

  ls = []

  for x in self.lists:

  ls.append(x)

  #print("val = {}".format(x))

 

  ls.append(len(ls))

  return ls

if __name__ == "__main__":

  ls = [1,2,3,5,6,8]

  res = apply(ls)

  print('res={}'.format(res))

 

The dfx script the Python file calls:

 

 

A

B

1

=ym_env()

 

2

=date("1982/08/09")

 

3

="C:/Program Files/raqsoft/yimming/store/script/demo.py"

 

4

=file("D:/dev/workspace/pyjava/data2/train_t.csv").import@cqt()

 

5

=time@s(now())

=datetime@s(now())

6

=ym_exec(A3, A4, false, 12345, 10737418240, 123.45, decimal(1234567890123456), "aaa中国123", A2, A5, B5)

 

//The parameter can be any data type

7

=ym_exec(A4,  A1)

 

8

>ym_close(A2)

 

 

Example 2: To make the call using the Python module algorithm, use pls_zh.py file below:

from scipy.linalg import pinv2

from sklearn.utils.validation import *

from sklearn.utils.extmath import *

from sklearn.cross_decomposition.pls_ import _nipals_twoblocks_inner_loop

import numpy as np

import pandas as pd

import demjson

 

class pls_zh():

  def __init__(self,

  n_components=15,

  deflation_mode="regression",

  mode="A",

  norm_y_weights=False,

  scale=False,

  algorithm="nipals",

   max_iter=500,

  tol=1e-06,

  copy=True):

  self.n_components = n_components

  self.deflation_mode = deflation_mode

  self.mode = mode

  self.norm_y_weights = norm_y_weights

  self.scale = scale

  self.algorithm = algorithm

  self.max_iter = max_iter

  self.tol = tol

  self.copy = copy

  print("n_components={};deflation_mode={};mode={};norm_y_weights={}; scale={}; algorithm={};max_iter={};tol={};copy={}".format(

    n_components,deflation_mode,mode,norm_y_weights,scale,algorithm,max_iter,tol,self.copy  ))

 

  def center_scale_xy(self, X, Y):

  # center

  x_mean = X.mean(axis=0)

  X -= x_mean

  y_mean = Y.mean(axis=0)

   Y -= y_mean

  # scale

  x_std = np.ones(X.shape[1])

  y_std = np.ones(Y.shape[1])

  return X, Y, x_mean, y_mean, x_std, y_std

 

  def fit(self, X, Y):

  check_consistent_length(X, Y)

  X = check_array(X, dtype=np.float64, copy=self.copy,

  ensure_min_samples=2)

  Y = check_array(Y, dtype=np.float64,

  copy=self.copy, ensure_2d=False)

  if Y.ndim == 1:

  Y = Y.reshape(-1, 1)

 

  n = X.shape[0]

   p = X.shape[1]

  q = Y.shape[1]

  X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (

  self.center_scale_xy(X, Y))

  # Residuals (deflated) matrices

  Xk = X

  Yk = Y

  # Results matrices

  self.x_scores_ = np.zeros((n, self.n_components))

  self.y_scores_ = np.zeros((n, self.n_components))

  self.x_weights_ = np.zeros((p, self.n_components))

  self.y_weights_ = np.zeros((q, self.n_components))

  self.x_loadings_ = np.zeros((p, self.n_components))

  self.y_loadings_ = np.zeros((q, self.n_components))

  self.n_iter_ = []

 

  Y_eps = np.finfo(Yk.dtype).eps

  for k in range(self.n_components):

  Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)

  Yk[:, Yk_mask] = 0.0

 

  x_weights, y_weights, n_iter_ = \

  _nipals_twoblocks_inner_loop(

  X=Xk, Y=Yk, mode=self.mode, max_iter=self.max_iter,

  tol=self.tol, norm_y_weights=self.norm_y_weights)

  self.n_iter_.append(n_iter_)

 

  x_weights, y_weights = svd_flip(x_weights, y_weights.T)

  y_weights = y_weights.T

 

  x_scores = np.dot(Xk, x_weights)

  if self.norm_y_weights:

   y_ss = 1

  else:

  y_ss = np.dot(y_weights.T, y_weights)

  y_scores = np.dot(Yk, y_weights) / y_ss

 

  if np.dot(x_scores.T, x_scores) < np.finfo(np.double).eps:

  warnings.warn('X scores are null at iteration %s' % k)

  break

  x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)

 

  Xk -= np.dot(x_scores, x_loadings.T)

  y_loadings = (np.dot(Yk.T, x_scores)

  / np.dot(x_scores.T, x_scores))

  Yk -= np.dot(x_scores, y_loadings.T)

  self.x_scores_[:, k] = x_scores.ravel()  # T

  self.y_scores_[:, k] = y_scores.ravel()  # U

  self.x_weights_[:, k] = x_weights.ravel()  # W

   self.y_weights_[:, k] = y_weights.ravel()  # C

  self.x_loadings_[:, k] = x_loadings.ravel()  # P

  self.y_loadings_[:, k] = y_loadings.ravel()  # Q

 

  # T = X W(P'W)^-1 = XW* (W* : p x k matrix)

  # U = Y C(Q'C)^-1 = YC* (W* : q x k matrix)

  self.x_rotations_ = np.dot(

  self.x_weights_,

  pinv2(np.dot(self.x_loadings_.T, self.x_weights_),

  check_finite=False))

  if Y.shape[1] > 1:

  self.y_rotations_ = np.dot(

  self.y_weights_,

  pinv2(np.dot(self.y_loadings_.T, self.y_weights_),

  check_finite=False))

  else:

  self.y_rotations_ = np.ones(1)

 

  self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)

  self.coef_ = self.coef_ * self.y_std_

  self.coef_ = np.asarray(self.coef_, dtype=X.dtype)

  self.compute_b(X)

 

  return (self.coef_, self.y_mean_, self.x_mean_, self.x_std_, x_weights)

 

  def compute_b(self, X):

   X = check_array(X, copy=True, dtype=FLOAT_DTYPES)

  X1 = X.copy()

  X -= self.x_mean_

  self.b = (np.dot(X, self.coef_) + self.y_mean_ - np.dot(X1, self.coef_))[0][0]

 

  def predict(self, X, copy=True):

  X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)

  Ypred = np.dot(X, self.coef_)

  return Ypred + self.b

 

# The to-be-scored data is a file with column headers in dataframe, where target specifies the column which is the target variable

def apply(lists):

  if len(lists)<2:

  return None

  # Read in data

  data = lists[0]

  val =  lists[1]

  if (type(data).__name__ =="str"):

  data = pd.read_csv(data)

  #print(val)

  val = val.lower().replace("false", "'False'")

  val = val.replace("true", "'True'")

  val = val.replace("none", "'None'")

  dic = demjson.decode(val)

  if dic.__contains__('target') ==False:

  print("param target is not set")

  return

 

  targ = dic['target']

  if type(targ).__name__ == "int":

   col = data.columns

  colname = col.tolist()[targ]

  else:

  colname = targ

  Y = data[colname] 

  X = data.drop(colname, axis=1)

 

  # The model parameter

  if dic['n_components'] :n_components=dic['n_components']

  else: n_components=15

  if dic['deflation_mode'] :deflation_mode=dic['deflation_mode']

  else: deflation_mode="regression"

  if dic['mode'] :mode=dic['mode'] 

  else: mode="A"

 

  if dic['norm_y_weights'] and dic['norm_y_weights'] == "True":

  norm_y_weights=True

  else: norm_y_weights=False

  if dic['scale'] and dic['scale'] == "True":

  scale=True

  else: scale=False

  if dic['algorithm'] :algorithm=dic['algorithm']

  else: algorithm="nipals"

 

  if dic['max_iter'] :max_iter=dic['max_iter']

  else: max_iter=500

  if dic['tol'] :tol=dic['tol']

  else: tol=1e-06

  if dic['copy'] and dic['copy'] == "False":

  copy=False

else: copy=True

 

  # Initialize the model

  #print("n_components={}".format( n_components))

  pls_model = pls_zh(n_components,

  deflation_mode,

  mode,

  norm_y_weights,

  scale,

  algorithm,

  max_iter,

   tol,

  copy)

 

  # Training data

  pls_model.fit(X, Y)

 

  # Scoring

  y_pred = pls_model.predict(X)

  #

  f = ["value"]

  df = pd.DataFrame(y_pred, columns=f)

  #print(type(df))

  #print(y_pred)

  lls=[]

  lls.append(df)

  return lls

 

if __name__ == '__main__':

  ls = []

  ls.append("a2ef764c53ec1fbc_X.new.csv")

val = "{target:0,n_components:3,deflation_mode:'regression'," \

  " mode:'a',norm_y_weights:False," \

  " scale:False,algorithm:'nipals'," \

  " max_iter:500,tol:0.000001,copy:True}"

 

ls.append(val)

apply(ls)

 

The dfx file the Python modul file calls:

 

 

A

B

1

=ym_env()

 

2

="D:/script/pls_zh.py"

 

3

=file("D:/script/data_test.csv").import@cqt()

//To-be-scored data

4

{target:0,n_components:3,deflation_mode:'regression',

  mode:'A',norm_y_weights:False,

  scale:False,algorithm:'nipals',

  max_iter:500,tol:0.000001,copy:True}

//The first column is the target variable, which is defined through a JSON string

5

=ym_exec(A2, A3, A4)

 

6

>ym_close(A2)

 

 

Below is the content of data_test.csv:

0

1

2

3

4

5

6

7

8

9

181.6

-0.00182

-0.00796

-0.00748

-0.00286

0.004846

0.015545

0.028104

0.039865

0.046408

154.5

-0.00102

-0.00789

-0.00795

-0.00361

0.004065

0.015055

0.028321

0.041063

0.048227

195

0.001206

-0.00464

-0.00404

0.000681

0.008794

0.020834

0.036321

0.051656

0.059063

150.8

-0.00154

-0.00802

-0.00768

-0.0028

0.00554

0.01712

0.03072

0.043453

0.050239