Description:
Execute a Python file to define a data file and the target variable for data scoring.
Syntax:
ym_exec(pyfile,data, jsonstr)
Note:
The external library function (See External Library
Guide) executes a Python file. Parameter data is a table
sequence or a table with column headers. It defines a data file containing a
target variable. The target variable is specified through parameter jsonstr, which is a JSON
string holding the column of the desirable target variable. Both data and jsonstr are called by the
Python file pyfile.
A parameter passed in
need to match the one in the Python file receiving it to make the latter parse
it correctly.
Parameter:
pyfile |
A Python module file that implements def apply(lists) interface; parameter lists is the list data type |
data |
A data file |
jsonstr |
A JSON string like: mode:'A',norm_y_weights:False, scale:False,algorithm:'nipals', max_iter:500,tol:0.000001,copy:True} In which target specifies the column holding the target variable |
Return value:
Table sequence
Example:
Example 1: To make an ordinary call from the Python file, use the following demo.py file:
interface def apply(lists)
# -*- coding: utf-8 -*-
def apply(lists):
d = demo(lists)
ls = d.do_print()
return ls
class demo():
def __init__(self,lists):
self.lists = lists
pass
def count(self, num):
return 1+10*num
def do_print(self):
ls = []
for x in self.lists:
ls.append(x)
#print("val = {}".format(x))
ls.append(len(ls))
return ls
if __name__ == "__main__":
ls = [1,2,3,5,6,8]
res = apply(ls)
print('res={}'.format(res))
The dfx script the Python file calls:
|
A |
B |
1 |
=ym_env() |
|
2 |
=date("1982/08/09") |
|
3 |
="C:/Program Files/raqsoft/yimming/store/script/demo.py" |
|
4 |
=file("D:/dev/workspace/pyjava/data2/train_t.csv").import@cqt() |
|
5 |
=time@s(now()) |
=datetime@s(now()) |
6 |
=ym_exec(A3, A4, false, 12345, 10737418240, 123.45, decimal(1234567890123456), "aaa中国123", A2, A5, B5) |
//The parameter can be any data type |
7 |
=ym_exec(A4, A1) |
|
8 |
>ym_close(A2) |
|
Example 2: To make the call using the Python module algorithm, use pls_zh.py file below:
from scipy.linalg import pinv2
from sklearn.utils.validation import *
from sklearn.utils.extmath import *
from sklearn.cross_decomposition.pls_ import _nipals_twoblocks_inner_loop
import numpy as np
import pandas as pd
import demjson
class pls_zh():
def __init__(self,
n_components=15,
deflation_mode="regression",
mode="A",
norm_y_weights=False,
scale=False,
algorithm="nipals",
max_iter=500,
tol=1e-06,
copy=True):
self.n_components = n_components
self.deflation_mode = deflation_mode
self.mode = mode
self.norm_y_weights = norm_y_weights
self.scale = scale
self.algorithm = algorithm
self.max_iter = max_iter
self.tol = tol
self.copy = copy
print("n_components={};deflation_mode={};mode={};norm_y_weights={}; scale={}; algorithm={};max_iter={};tol={};copy={}".format(
n_components,deflation_mode,mode,norm_y_weights,scale,algorithm,max_iter,tol,self.copy ))
def center_scale_xy(self, X, Y):
# center
x_mean = X.mean(axis=0)
X -= x_mean
y_mean = Y.mean(axis=0)
Y -= y_mean
# scale
x_std = np.ones(X.shape[1])
y_std = np.ones(Y.shape[1])
return X, Y, x_mean, y_mean, x_std, y_std
def fit(self, X, Y):
check_consistent_length(X, Y)
X = check_array(X, dtype=np.float64, copy=self.copy,
ensure_min_samples=2)
Y = check_array(Y, dtype=np.float64,
copy=self.copy, ensure_2d=False)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
n = X.shape[0]
p = X.shape[1]
q = Y.shape[1]
X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
self.center_scale_xy(X, Y))
# Residuals (deflated) matrices
Xk = X
Yk = Y
# Results matrices
self.x_scores_ = np.zeros((n, self.n_components))
self.y_scores_ = np.zeros((n, self.n_components))
self.x_weights_ = np.zeros((p, self.n_components))
self.y_weights_ = np.zeros((q, self.n_components))
self.x_loadings_ = np.zeros((p, self.n_components))
self.y_loadings_ = np.zeros((q, self.n_components))
self.n_iter_ = []
Y_eps = np.finfo(Yk.dtype).eps
for k in range(self.n_components):
Yk_mask = np.all(np.abs(Yk) < 10 * Y_eps, axis=0)
Yk[:, Yk_mask] = 0.0
x_weights, y_weights, n_iter_ = \
_nipals_twoblocks_inner_loop(
X=Xk, Y=Yk, mode=self.mode, max_iter=self.max_iter,
tol=self.tol, norm_y_weights=self.norm_y_weights)
self.n_iter_.append(n_iter_)
x_weights, y_weights = svd_flip(x_weights, y_weights.T)
y_weights = y_weights.T
x_scores = np.dot(Xk, x_weights)
if self.norm_y_weights:
y_ss = 1
else:
y_ss = np.dot(y_weights.T, y_weights)
y_scores = np.dot(Yk, y_weights) / y_ss
if np.dot(x_scores.T, x_scores) < np.finfo(np.double).eps:
warnings.warn('X scores are null at iteration %s' % k)
break
x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
Xk -= np.dot(x_scores, x_loadings.T)
y_loadings = (np.dot(Yk.T, x_scores)
/ np.dot(x_scores.T, x_scores))
Yk -= np.dot(x_scores, y_loadings.T)
self.x_scores_[:, k] = x_scores.ravel() # T
self.y_scores_[:, k] = y_scores.ravel() # U
self.x_weights_[:, k] = x_weights.ravel() # W
self.y_weights_[:, k] = y_weights.ravel() # C
self.x_loadings_[:, k] = x_loadings.ravel() # P
self.y_loadings_[:, k] = y_loadings.ravel() # Q
# T = X W(P'W)^-1 = XW* (W* : p x k matrix)
# U = Y C(Q'C)^-1 = YC* (W* : q x k matrix)
self.x_rotations_ = np.dot(
self.x_weights_,
pinv2(np.dot(self.x_loadings_.T, self.x_weights_),
check_finite=False))
if Y.shape[1] > 1:
self.y_rotations_ = np.dot(
self.y_weights_,
pinv2(np.dot(self.y_loadings_.T, self.y_weights_),
check_finite=False))
else:
self.y_rotations_ = np.ones(1)
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
self.coef_ = self.coef_ * self.y_std_
self.coef_ = np.asarray(self.coef_, dtype=X.dtype)
self.compute_b(X)
return (self.coef_, self.y_mean_, self.x_mean_, self.x_std_, x_weights)
def compute_b(self, X):
X = check_array(X, copy=True, dtype=FLOAT_DTYPES)
X1 = X.copy()
X -= self.x_mean_
self.b = (np.dot(X, self.coef_) + self.y_mean_ - np.dot(X1, self.coef_))[0][0]
def predict(self, X, copy=True):
X = check_array(X, copy=copy, dtype=FLOAT_DTYPES)
Ypred = np.dot(X, self.coef_)
return Ypred + self.b
# The to-be-scored data is a file with column headers in dataframe, where target specifies the column which is the target variable
def apply(lists):
if len(lists)<2:
return None
# Read in data
data = lists[0]
val = lists[1]
if (type(data).__name__ =="str"):
data = pd.read_csv(data)
#print(val)
val = val.lower().replace("false", "'False'")
val = val.replace("true", "'True'")
val = val.replace("none", "'None'")
dic = demjson.decode(val)
if dic.__contains__('target') ==False:
print("param target is not set")
return
targ = dic['target']
if type(targ).__name__ == "int":
col = data.columns
colname = col.tolist()[targ]
else:
colname = targ
Y = data[colname]
X = data.drop(colname, axis=1)
# The model parameter
if dic['n_components'] :n_components=dic['n_components']
else: n_components=15
if dic['deflation_mode'] :deflation_mode=dic['deflation_mode']
else: deflation_mode="regression"
if dic['mode'] :mode=dic['mode']
else: mode="A"
if dic['norm_y_weights'] and dic['norm_y_weights'] == "True":
norm_y_weights=True
else: norm_y_weights=False
if dic['scale'] and dic['scale'] == "True":
scale=True
else: scale=False
if dic['algorithm'] :algorithm=dic['algorithm']
else: algorithm="nipals"
if dic['max_iter'] :max_iter=dic['max_iter']
else: max_iter=500
if dic['tol'] :tol=dic['tol']
else: tol=1e-06
if dic['copy'] and dic['copy'] == "False":
copy=False
else: copy=True
# Initialize the model
#print("n_components={}".format( n_components))
pls_model = pls_zh(n_components,
deflation_mode,
mode,
norm_y_weights,
scale,
algorithm,
max_iter,
tol,
copy)
# Training data
pls_model.fit(X, Y)
# Scoring
y_pred = pls_model.predict(X)
#
f = ["value"]
df = pd.DataFrame(y_pred, columns=f)
#print(type(df))
#print(y_pred)
lls=[]
lls.append(df)
return lls
if __name__ == '__main__':
ls = []
ls.append("a2ef764c53ec1fbc_X.new.csv")
val = "{target:0,n_components:3,deflation_mode:'regression'," \
" mode:'a',norm_y_weights:False," \
" scale:False,algorithm:'nipals'," \
" max_iter:500,tol:0.000001,copy:True}"
ls.append(val)
apply(ls)
The dfx file the Python modul file calls:
|
A |
B |
1 |
=ym_env() |
|
2 |
="D:/script/pls_zh.py" |
|
3 |
=file("D:/script/data_test.csv").import@cqt() |
//To-be-scored data |
4 |
{target:0,n_components:3,deflation_mode:'regression', mode:'A',norm_y_weights:False, scale:False,algorithm:'nipals', max_iter:500,tol:0.000001,copy:True} |
//The first column is the target variable, which is defined through a JSON string |
5 |
=ym_exec(A2, A3, A4) |
|
6 |
>ym_close(A2) |
|
Below is the content of data_test.csv:
0 |
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
181.6 |
-0.00182 |
-0.00796 |
-0.00748 |
-0.00286 |
0.004846 |
0.015545 |
0.028104 |
0.039865 |
0.046408 |
154.5 |
-0.00102 |
-0.00789 |
-0.00795 |
-0.00361 |
0.004065 |
0.015055 |
0.028321 |
0.041063 |
0.048227 |
195 |
0.001206 |
-0.00464 |
-0.00404 |
0.000681 |
0.008794 |
0.020834 |
0.036321 |
0.051656 |
0.059063 |
150.8 |
-0.00154 |
-0.00802 |
-0.00768 |
-0.0028 |
0.00554 |
0.01712 |
0.03072 |
0.043453 |
0.050239 |