PROGRAMMING

  minte9
learningjourney




S R Q

Make blobs

p25 Dataset that work well with clustering techniques.
 
""" Make blobs
Dataset that work well with clustering techniques.
"""

import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

features, target = make_blobs(
    n_samples = 100,
    n_features = 2,
    centers = 3, # three target classes
    cluster_std = 0.5,
    shuffle = True,
    random_state = 1
)

print(features[0:3])
    # [[-1.22685609    3.25572052]
    #  [-9.57463218   -4.38310652]
    #  [-10.71976941  -4.20558148]]

print(target[:10])
    # [0 1 1 1 2 2 2 1 0 0]

plt.scatter(features[:, 0], features[:, 1], c=target)
plt.title('Make blob - Simultated dataset')
plt.show()
Make classification

Make classification

p25 Simulated data for classification.
 
""" Make classification
Create simulated data for classification.
"""

import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

features, target = make_classification(
    n_samples = 100,
    n_features = 2,
    n_informative = 2,
    n_redundant = 0,
    n_classes = 2,
    weights = [.25, .75],
    random_state = 1
)

print(features[0:3])
    # [[ 1.30022717 -0.7856539 ]
    #  [ 1.44184425 -0.56008554]
    #  [-0.84792445 -1.36621324]]

print(target[:10])
    # [0 1 1 1 2 2 2 1 0 0]

plt.scatter(features[:, 0], features[:, 1], c=target)
plt.title('Make classification - Simultated dataset')
plt.show()
Make regression

Make regression

p24 Dataset designed to be used with linear regression.
 
""" Make regression
Dataset designed to be used with linear regression.
"""

import matplotlib.pyplot as plt
from sklearn.datasets import make_regression

# Sample dataset
features, target, coef = make_regression(
    n_samples = 100,
    n_features = 3,
    n_informative = 3,
    n_targets = 1,
    noise = 0,
    coef = True,
    random_state = 1
)

# Feature Matrix (first three rows)
print(features[:2])
    # [[ 1.29322588 -0.61736206 -0.11044703]
    #  [-2.793085    0.36633201  1.93752881]
    #  [ 0.80186103 -0.18656977  0.0465673 ]]

# Target vector (first three elements)
print(target[:3])
    # [-10.37865986  25.5124503   19.67705609]

Salary Example

Suppose, we have a survey among the employees of a company.
 
""" Simulated dataset (experience / salary)

Simulate the data for building a regression model.
Suppose, we have a survey among the employees of a company.

As a developer, often you have no access to survey data.
You need to simulate the data for building the regression model.
"""

import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
import numpy as np
import pandas as pd

# Sample dataset
X, y = make_regression(
    n_samples = 100, 
    n_features = 1, 
    n_informative = 1, 
    n_targets = 1,
    noise = 10, 
    coef = False, 
    random_state = 0
)
print(X[:1]) # [[-0.35955316]]
print(y[:1]) # [-19.95588561]


# Scale
X = np.interp(X, (X.min(), X.max()), (0, 20))           # years of experience
y = np.interp(y, (y.min(), y.max()), (10000, 200000))   # salary

# Dataframe
df = pd.DataFrame(data={'Experience': X.flatten(), 'Salary': y})
print(df.head(2))
    #  Experience         Salary
    #    9.096218   95224.004179
    #   14.637429  132619.663538

# Plot dataset points
plt.scatter(X, y, label='training data')
plt.title('Simultated dataset (Experience / Salary)')


# Fit a linear regression model
reg = LinearRegression().fit(X, y)

# Plot the regression line 
x_line = np.linspace(np.min(X), np.max(X), 100)
y_line = reg.intercept_ + x_line * reg.coef_[0]
plt.plot(x_line, y_line, color='red', label='prediction')
plt.text(10, 25000, r'y = %0.2f + %0.2f x' % (reg.intercept_, reg.coef_[0]))
plt.xlabel('Years of experience')
plt.ylabel('Salary')
plt.legend()
plt.show()

Questions    
Last update: 46 days ago