import csv
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
"This code moves data from two other csv files to the appropriate cols in another csv file, then it calculates money conversion rates, linear regression, k fold accuraccy and importance accuracy of at least 8 parameters. libs are installed already as imports."
"Citations: karthikeya boyini, niklila garwal, user81530, robert g brown, di candice han, wolf29, snippsat, jean francois fabre, bigfoot29, abishek wasnik"
"Written by: me"
w=0;j=0;t=0;
fields1=['Country','Date','Accounts Payable','Cost','Company','USD to Country Unit','Orders','Proj Payable','Proj Cost','Proj Orders','NH_REPUN','NH_ACCDOC','NH_NETDUE','NH_VCLRDT','0CALDAY','NH_VEN_TXT','CUR002','00O2SPADWAPXIIOCRYG5A9QEI','av kfold acc','importance']
rows1=['','','','','',c,'',pp,pc,po,'','','','','','','','','',accav,ce]
if fields1=='Country' and rows1=='CN': c=.15
elif fields1=='Country' and rows1=='DE': c=.6
elif fields1=='Country' and rows1=='SZ': c=1.1
elif fields1=='Country' and rows1=='IR': c=1.2
elif fields1=='Country' and rows1=='RU': c=.013
if fields1=='Proj Cost': pc='Cost'-(sum('Cost')/(rows1*rows1))
elif fields1=='Proj Payable': pp='Payable'-(sum('Payable')/(rows1*rows1))
elif fields1=='Proj Orders': po='Orders'-(sum('Orders')/(rows1*rows1))
filename1="test_sheet_to_fill.csv"
filename2="anonymized_aggregated_data.csv"
filename3="anonymized_train_data.csv"
with open(filename1,'a1') as csvfile:
csvwriter1=csv.writer(csvfile)
csvwriter1.writerow(fields1)
csvwriter1.writerows(rows1)
with open('anonymized_train_data.csv','a3') as f,open('test_sheet_to_fill.csv','a1') as f_out:
reader = csv.reader(f)
writer = csv.writer(f_out)
for rows1 in fields1:
rows1=str(rows1)
rows1new=[]
if rows1.count(",")>0:
for fields1 in rows1.split(","):
rows1new.append(fields1)
print(str(rows1new))
with open('anonymized_aggregated_data.csv','a2') as g,open('test_sheet_to_fill.csv','a1') as g_out:
reader = csv.reader(g)
writer = csv.writer(g_out)
for rows1 in fields1:
rows1=str(rows1)
rows1new=[]
if rows1.count(",")>10:
for fields1 in rows1.split(","):
rows1new.append(fields1)
print(str(rows1new))
data = test_sheet_to_fill(as_frame = True)
df = data.frame
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
#Implementing cross validation
k = 5
kf = KFold(n_splits=k, random_state=None)
model = LogisticRegression(solver= 'liblinear')
acc_score = []
for train_index , test_index in kf.split(X):
X_train , X_test = X.iloc[train_index,:],X.iloc[test_index,:]
y_train , y_test = y[train_index] , y[test_index]
model.fit(X_train,y_train)
pred_values = model.predict(X_test)
acc = accuracy_score(pred_values , y_test)
acc_score.append(acc)
avg_acc_score = sum(acc_score)/k
acf=('accuracy of each fold - {}'.format(acc_score))
accav=('Avg accuracy : {}'.format(avg_acc_score))
train = pd.read_csv("test_sheet_to_fill.csv")
clf = YourClassifiers()
clf.fit(train[fields1], train.targets) # targets/labels
ce=clf.feature_importances_
""""
fields2=['NH_REPUN','NH_ACCDOC','NH_NETDUE','NH_VCLRDT','0CALDAY','NH_VEN_TXT','CUR002','00O2SPADWAPXIIOCRYG5A9QEI']
filename2="anonymized_aggregated_data.csv"
with open(filename2,'a2') as csvfile:
csvwriter2=csv.writer(csvfile)
csvwriter2.writerow(fields1)
csvwriter2.writerows(rows2)
YOU ARE READING
My Prog Adventures
SpiritualI am also a programmer. I have been intending to post some of my scripts on here. I can teach math and programming, please reach out to me in conversations, my messages on here isnt working.
