csvpredtrain.py

0 0 0
                                        

import csv
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

"This code moves data from two other csv files to the appropriate cols in another csv file, then it calculates money conversion rates, linear regression, k fold accuraccy and importance accuracy of at least 8 parameters. libs are installed already as imports."

"Citations: karthikeya boyini, niklila garwal, user81530, robert g brown, di candice han, wolf29, snippsat, jean francois fabre, bigfoot29, abishek wasnik"

"Written by: me"

w=0;j=0;t=0;

fields1=['Country','Date','Accounts Payable','Cost','Company','USD to Country Unit','Orders','Proj Payable','Proj Cost','Proj Orders','NH_REPUN','NH_ACCDOC','NH_NETDUE','NH_VCLRDT','0CALDAY','NH_VEN_TXT','CUR002','00O2SPADWAPXIIOCRYG5A9QEI','av kfold acc','importance']

rows1=['','','','','',c,'',pp,pc,po,'','','','','','','','','',accav,ce]

if fields1=='Country' and rows1=='CN': c=.15

elif fields1=='Country' and rows1=='DE': c=.6

elif fields1=='Country' and rows1=='SZ': c=1.1

elif fields1=='Country' and rows1=='IR': c=1.2

elif fields1=='Country' and rows1=='RU': c=.013

if fields1=='Proj Cost': pc='Cost'-(sum('Cost')/(rows1*rows1))

elif fields1=='Proj Payable': pp='Payable'-(sum('Payable')/(rows1*rows1))

elif fields1=='Proj Orders': po='Orders'-(sum('Orders')/(rows1*rows1))

filename1="test_sheet_to_fill.csv"

filename2="anonymized_aggregated_data.csv"

filename3="anonymized_train_data.csv"

with open(filename1,'a1') as csvfile:

csvwriter1=csv.writer(csvfile)

csvwriter1.writerow(fields1)

csvwriter1.writerows(rows1)

with open('anonymized_train_data.csv','a3') as f,open('test_sheet_to_fill.csv','a1') as f_out:
     reader = csv.reader(f)
     writer = csv.writer(f_out)
     for rows1 in fields1:
     rows1=str(rows1)
     rows1new=[]
     if rows1.count(",")>0:
     for fields1 in rows1.split(","):
     rows1new.append(fields1)
     print(str(rows1new))

                                   
with open('anonymized_aggregated_data.csv','a2') as g,open('test_sheet_to_fill.csv','a1') as g_out:
     reader = csv.reader(g)
     writer = csv.writer(g_out)
     for rows1 in fields1:
     rows1=str(rows1)
     rows1new=[]
     if rows1.count(",")>10:
     for fields1 in rows1.split(","):
     rows1new.append(fields1)
     print(str(rows1new))



data = test_sheet_to_fill(as_frame = True)
df = data.frame
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

#Implementing cross validation

k = 5
kf = KFold(n_splits=k, random_state=None)
model = LogisticRegression(solver= 'liblinear')

acc_score = []

for train_index , test_index in kf.split(X):
    X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
    y_train , y_test = y[train_index] , y[test_index]

    model.fit(X_train,y_train)
    pred_values = model.predict(X_test)

    acc = accuracy_score(pred_values , y_test)
    acc_score.append(acc)

avg_acc_score = sum(acc_score)/k

acf=('accuracy of each fold - {}'.format(acc_score))
accav=('Avg accuracy : {}'.format(avg_acc_score))

train = pd.read_csv("test_sheet_to_fill.csv")

clf = YourClassifiers()
clf.fit(train[fields1], train.targets) # targets/labels

ce=clf.feature_importances_

""""

fields2=['NH_REPUN','NH_ACCDOC','NH_NETDUE','NH_VCLRDT','0CALDAY','NH_VEN_TXT','CUR002','00O2SPADWAPXIIOCRYG5A9QEI']

filename2="anonymized_aggregated_data.csv"

with open(filename2,'a2') as csvfile:

csvwriter2=csv.writer(csvfile)

csvwriter2.writerow(fields1)

csvwriter2.writerows(rows2)

My Prog AdventuresWhere stories live. Discover now