Question Details

(solution) My program is almost finished. The only function that isn't


My program is almost finished. The only function that isn't working is my learn1(). It uses supervised machine learning (SVM) to analyze the file 'wimbledon_men'. I don't know how to fix it. It may be related to my load_data function, specifically this part:

for row in contents:
#print ', '.join(row)
#print row[col1]
#print row[col2]
x.append(row[col1]) # column col1
y.append(row[col2]) # column col2


#Teodora Ivanovic

 

#250809167

 

# imports

 

import csv

 

import random

 

import matplotlib.pyplot as plt

 

import collections

 

import pylab

 

import numpy as np

 

from sklearn import datasets

 

from sklearn import svm

 

from matplotlib import style

 

from sklearn.cluster import KMeans

 

# functions

 

def load_data(filename,col1,col2):

 

"""

 

This function loads the file in csv reader format and returns a list of

 

lists containing the dataset of top restaurants according to consumer

 

preferences.

 

y will return all of the elements pulled from the list.

 

param filename: name of the file you want to load.

 

"""

 

x = y = with open(filename, 'r') as csvfile:

 

contents = csv.reader(csvfile, delimiter=',')

 

for row in contents:

 

#print ', '.join(row)

 

#print row[col1]

 

#print row[col2]

 

x.append(row[col1])

 

# column col1

 

y.append(row[col2])

 

# column col2

 

#for i in x: #every line that is selected from the file will be added to the

 

empty list created above

 

#

 

y.append(x)

 

if x[0] is not int:

 

x = x[1:len(x)]

 

# skip the header

 

y = y[1:len(y)]

 

# skip the header

 

if x[0] is str:

 

x = map(int, x)

 

# return as integers

 

y = map(int, y)

 

# return as integers

 

return (x,y)

 

def shell(n):

 

"""

 

This function uses shell sort to arrange the data sequence in order.

 

It works by arranging the data sequence in a two-dimensional array and then

 

sorts the columns of the array using insertion sort.

 

param n - size of sequence

 

"""

 

inc = len(n) // 2

 

while inc:

 

for i, key in enumerate(n):

 

while i >= inc and n[i - inc] > key:

 

n[i] = n[i - inc]

 

i -= inc n[i] = key

 

inc = 1 if inc == 2 else int(inc/2.2)

 

#test harness

 

def part1():

 

"""

 

This function calls on the shell sorting algorithm.

 

"""

 

data = [22,7,2,-5,8,4,1224,45,0,32,19,13,124]

 

print data

 

shell(data)

 

print data

 

part1()

 

#pie chart

 

def viz1():

 

with open('userpayment.csv', 'r') as f:

 

data_s = collections.Counter([line.split(',')[1].strip() for line in

 

f.readlines()]).most_common()

 

x = [app[1] for app in data_s]

 

l = [app[0] for app in data_s]

 

plt.pie(x)

 

plt.legend(l, loc='best')

 

plt.show()

 

#scatterplot

 

def viz2(datax,datay):

 

"""

 

This function plots the values from the file in a scatterplot to test

 

whether there is a positive correlation.

 

The file used contains tennis match statistics for men at the 2013 Wimbledon

 

tournament.

 

The specific characteristics graphed in this function are Second Serve

 

Percentage for

 

player 1 and Second Serve Won by player 1.

 

param datax - first set of values you want to plot

 

param datay - second set of values you want to plot

 

"""

 

SSP1 = datax

 

SSW1 = datay

 

plt.xlabel('Second serve percentage for player')

 

plt.ylabel('Second serve won by player')

 

plt.scatter(SSP1,SSW1)

 

plt.show()

 

#svm - supervised machine learning

 

def learn1():

 

from sklearn import svm

 

(a,b) = load_data('wimbledon_men.csv',6,8)

 

plt.scatter(a,b)

 

plt.show()

 

clf = svm.SVC()

 

print len(a[0])

 

print len(b[0])

 

print a

 

print b

 

clf.fit(a,b)

 

SVC(C=1.0, cache_size=200, class_weight=None,

 

coef0=0.0,decision_function_shape=None, degree=3, gamma='auto',

 

kernel='rbf',max_iter=-1, probability=False, random_state=None,

 

shrinking=True,tol=0.001, verbose=False) clf.predict([[2., 2.]])

 

array([1])

 

#k-means clustering

 

def learn2():

 

"""

 

This function clusters data points in the file in order to find a

 

correlation

 

between SSP1 and SSW1.

 

"""

 

style.use('ggplot')

 

(learnx,learny) = load_data('wimbledon_men.csv',9,10)

 

plt.scatter(learnx,learny)

 

plt.show()

 

X = [[learnx[i], learny[i]] for i in range(len(learnx))]

 

kmeans = KMeans(n_clusters=2)

 

kmeans.fit(X)

 

centroids = kmeans.cluster_centers_ #data point at the centre of the cluster

 

labels = kmeans.labels_

 

print(centroids)

 

print(labels)

 

colors = ['g.','r.']

 

for i in range(len(X)):

 

print('coordinate:',X[i],'label:',labels[i])

 

plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize = 10)

 

plt.scatter(centroids[:,0], centroids[:,1], marker = 'x', s=150, linewidths

 

=5, zorder = 10)

 

plt.show()

 

def part2():

 

"""

 

This functions calls on all of the functions after part1()

 

"""

 

viz1()

 

# test load data with 'wimbledon_men.csv'

 

(myx,myy) = load_data('wimbledon_men.csv',9,10)

 

# scatter plot

 

viz2(myx,myy)

 

# call supervised machine learning

 

learn1()

 

# call k-means clustering

 

learn2()

 


Solution details:
STATUS
Answered
QUALITY
Approved
ANSWER RATING

This question was answered on: Jan 30, 2021

PRICE: $15

Solution~0001016875.zip (25.37 KB)

Buy this answer for only: $15

This attachment is locked

We have a ready expert answer for this paper which you can use for in-depth understanding, research editing or paraphrasing. You can buy it or order for a fresh, original and plagiarism-free solution (Deadline assured. Flexible pricing. TurnItIn Report provided)

Pay using PayPal (No PayPal account Required) or your credit card . All your purchases are securely protected by .
SiteLock

About this Question

STATUS

Answered

QUALITY

Approved

DATE ANSWERED

Jan 30, 2021

EXPERT

Tutor

ANSWER RATING

GET INSTANT HELP/h4>

We have top-notch tutors who can do your essay/homework for you at a reasonable cost and then you can simply use that essay as a template to build your own arguments.

You can also use these solutions:

  • As a reference for in-depth understanding of the subject.
  • As a source of ideas / reasoning for your own research (if properly referenced)
  • For editing and paraphrasing (check your institution's definition of plagiarism and recommended paraphrase).
This we believe is a better way of understanding a problem and makes use of the efficiency of time of the student.

NEW ASSIGNMENT HELP?

Order New Solution. Quick Turnaround

Click on the button below in order to Order for a New, Original and High-Quality Essay Solutions. New orders are original solutions and precise to your writing instruction requirements. Place a New Order using the button below.

WE GUARANTEE, THAT YOUR PAPER WILL BE WRITTEN FROM SCRATCH AND WITHIN A DEADLINE.

Order Now