# minte9 LearnRemember

### Vector

A vector is an one-dimensional array.  """ Matrices / Vectors
Vectors are one-dimensional arrays
"""

import numpy as np

v1 = np.array([1, 2, 3])
v2 = np.array([, , ])

print("Row vector: \n", v1)
print("Column vector: \n", v2)

"""
Row vector:
[1 2 3]
Column vector:
[

]
"""


### Matrix

Numpy main data structure is the multidimensional array.  """ Matrices / Create matrix

Numpy is the foundation of the Python machine learning stack
The main data structure is the multidimensional array

Arrays are zero-indexed, first element index is 0
Use ':' to select everything 'up to' or 'after'
"""

import numpy as np

M = np.array([ # three rows, four columns
[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
])

v = np.array([1, 2, 3, 4, 5, 6])

print("M items: \n", M)
print("M.shape =", M.shape)
print("M.size =", M.size)
print("M.ndim =", M.ndim, '\n')

print("v[:]  =", v[:])
print("v[:3] =", v[:3])
print("v[3:] =", v[3:])
print("v[-1] =", v[-1], "\n")

print("M[1, 1] second row, second column:", M[1, 1])
print("M[:2, :] up to 2 rows, all columns: \n", M[:2, :])
print("M[:, 1:2] all rows, second column: \n", M[:, 1:2])

"""
M items:
[[ 1  2  3  4]
[ 5  6  7  8]
[ 9 10 11 12]]
M.shape = (3, 4)
M.size = 12
M.ndim = 2

v[:]  = [1 2 3 4 5 6]
v[:3] = [1 2 3]
v[3:] = [4 5 6]
v[-1] = 6

M[1, 1] second row, second column: 6
M[:2, :] up to 2 rows, all columns:
[[1 2 3 4]
[5 6 7 8]]
M[:, 1:2] all rows, second column:
[[ 2]
[ 6]
]
"""


### Sparse Matrices

Sparse matrices only store non-zero elements.  """ Matrices / Sparse matrix

A sparse matrix stores only non-zero elements, for computation savings.
Compress sparce row (CSR) matrices contain indexes of non-zero values.

Example (Netflix movies/users):
Columns are every movie on Netflix
Rows are every Netflix user
Values are how many times a user watched that movie
"""

import numpy as np
from scipy import sparse

# Sparse matrix
M = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
])
M_sparse = sparse.csr_matrix(M) # CSR matrix

# Random matrix
np.random.seed(0)
M_random1 = np.random.random(3)          # floats
M_random2 = np.random.randint(1, 11, 3)  # integers

print("Sparce matrix: \n", M_sparse)
print("Random matrix of floats: \n", M_random1)
print("Random matrix of integers: \n", M_random2)

"""
Sparce matrix:
(1, 1)        1
(2, 0)        3
Random matrix of floats:
[0.5488135  0.71518937 0.60276338]
Random matrix of integers:
[ 4  8 10]
"""


### Vectorization

Vectorization is essentialy a for loop.  """ Vectorization
It is essentialy a for loop that does not increase performance.
When broadcasting different dimensions are allowed.
"""

import numpy as np

M = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
])

add_100 = lambda i: i + 100                 # create function that add 100
M_vectorized1 = vectorize_add_100(M)        # apply vectorization to all

# One line code
M_vectorized2 = np.vectorize(lambda i: i + 100)(M)
assert(M_vectorized1 == M_vectorized2).all()

# Brodcasting
M_vectorized3 = M + 100

print("Matrix: \n", M)
print("Vectorized 100: \n", M_vectorized1)

"""
Matrix:
[[1 2 3]
[4 5 6]
[7 8 9]]
Vectorized 100:
[[101 102 103]
[104 105 106]
[107 108 109]]
[[101 102 103]
[104 105 106]
[107 108 109]]
"""


### Count Vectorizer

We can represent texts as vectors and compute similarity.  """ Count Vectorizer
Represent texts as vectors and compute similarity.
The word london occurs 2 times in A and 1 time in B.
We can find the cos similarity between these two vectors.
"""

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

A = 'London Paris London'
B = 'Paris Paris London'

cv = CountVectorizer()
matrix = cv.fit_transform([A, B])
similarity_scores = cosine_similarity(matrix)

print("Features names: \n", cv.get_feature_names_out())
print("Matrix vectorized: \n", matrix)
print("Matrix array: \n", matrix.toarray(), "\n")
print ("Similarity scores: \n", similarity_scores)

"""
Features names:
['london' 'paris']
Matrix vectorized:
(0, 0)       2
(0, 1)        1
(1, 0)        1
(1, 1)        2
Matrix array:
[[2 1]
[1 2]]

Similarity scores:
[[1.  0.8]
[0.8 1. ]]
"""