# -*- coding: utf-8 -*- """ Created on Mon Mar 21 14:49:47 2016 @author: Dr. Z """ # MAIN METHOD ##Main analysis #Initialization import pandas import numpy as np import scipy.stats import matplotlib.pyplot as plt data = pandas.read_csv('gapminder_with_hemispheres.csv', low_memory=False) #Convert relevant variables to be numeric data["incomeperperson"] = data["incomeperperson"].convert_objects(convert_numeric=True) data["urbanrate"] = data["urbanrate"].convert_objects(convert_numeric=True) #Handle missing data subframe = data[["incomeperperson", "urbanrate", "hemisphere"]] # select only relevant columns subframe['incomeperperson'] = data['incomeperperson'].replace(" ", np.nan) subframe['urbanrate'] = data['urbanrate'].replace(" ", np.nan) subframe['hemisphere'] = data['hemisphere'].replace("NA", np.nan) clean_data = subframe.dropna() #Create partitions of data set based on hemisphere moderator var. sub1 = clean_data[(clean_data['hemisphere']== "North")] sub2 = clean_data[(clean_data['hemisphere']== "South")] #Perform correlation analysis for the different partitions print('association between Income per Person and Urban Rate for North hemisphere countries') print(scipy.stats.pearsonr(sub1['incomeperperson'], sub1['urbanrate'])) print('') print('association between Income per Person and Urban Rate for South hemisphere countries') print(scipy.stats.pearsonr(sub2['incomeperperson'], sub2['urbanrate'])) print('') #Create plots plt.xlabel('Income per Person') plt.ylabel('Urban Rate') plt.title('Scatterplot for the Association Between Income per Person and Urban Rate for NORTH hemisphere countries') plt.scatter(sub1["incomeperperson"], sub1["urbanrate"], ) plt.show() plt.xlabel('Income per Person') plt.ylabel('Urban Rate') plt.title('Scatterplot for the Association Between Income per Person and Urban Rate for SOUTH hemisphere countries') plt.scatter(sub2["incomeperperson"], sub2["urbanrate"], ) plt.show()