Question 1
Let’s use the content-based method to make
some recommendations. The last 5 things I watched on Netflix and their
attributes were:
row_names <- c("FUNNY", "ROMANTIC", "SUSPENSE", "DARK", "RATING")
col_names <- c("SHARP OBJECTS", "ARRESTED DEV.", "ARBITRAGE", "MARGIN CALL", "BOJACK", "ORPHAN BLACK", "HINTERLAND")
item <- matrix( c(0, 1, 0, 1, 1, 1, 0,
1, 1, 0, 0, 1, 0, 0,
1, 1, 1, 0, 1, 0, 1,
1, 0, 1, 1, 0, 1, 1,
4, 3, 4, 5, 3, NA, NA), byrow = TRUE, nrow = 5, ncol = 7,
dimnames = list(row_names, col_names))
item %>%
kbl() %>%
kable_styling()
|
SHARP OBJECTS
|
ARRESTED DEV.
|
ARBITRAGE
|
MARGIN CALL
|
BOJACK
|
ORPHAN BLACK
|
HINTERLAND
|
FUNNY
|
0
|
1
|
0
|
1
|
1
|
1
|
0
|
ROMANTIC
|
1
|
1
|
0
|
0
|
1
|
0
|
0
|
SUSPENSE
|
1
|
1
|
1
|
0
|
1
|
0
|
1
|
DARK
|
1
|
0
|
1
|
1
|
0
|
1
|
1
|
RATING
|
4
|
3
|
4
|
5
|
3
|
NA
|
NA
|
How similar are my preferences to Orphan
Black? R: 0.36
Calculate the cosine similarity and report to 2 decimal points
(e.g. 0.12)
First, we normalize the ratings to the mean:
rating <- matrix(c(4, 3, 4, 5, 3), nrow=1,ncol=5)
rating_m <- rating-mean(rating)
rating_m %>%
kbl() %>%
kable_styling()
We now estimate how much the ratings changes with attributes:
user <- item[1:4,1:5] %*% t(rating_m)/rowSums(item[1:4,1:5])
user %>%
kbl() %>%
kable_styling()
FUNNY
|
-0.133
|
ROMANTIC
|
-0.467
|
SUSPENSE
|
-0.300
|
DARK
|
0.533
|
Finally, we take the similarity with respect to Orphan
Black:
new_item <- item[1:4,6:7]
CS <- t(new_item) %*% user / sqrt( colSums(new_item^2) )*sqrt( sum(user^2) )
round(CS,2) %>%
kbl() %>%
kable_styling()
ORPHAN BLACK
|
0.22
|
HINTERLAND
|
0.13
|
Question 2
How similar are my preferences to
Hinterland? R: 0.21
Calculate the cosine similarity and report to 2 decimal points
(e.g. 0.12).
CS <- t(new_item) %*% user / sqrt( colSums(new_item^2) )*sqrt( sum(user^2) )
CS %>%
kbl() %>%
kable_styling()
ORPHAN BLACK
|
0.221
|
HINTERLAND
|
0.129
|
Question 3
What should Netflix recommend to me, based on the
content-based method?
## Orphan Black, because is closer to 1
Question 4
Now, let’s do the same exercise through the lens of collaborative
filtering (on users, not items).
row_names<-c("George", "Adam", "Ben", "Cam", "Dan")
col_names<-c("Sharp Obj", "Arrested Dev", "Arbitrage", "Margin C", "Bojack", "Orphan B", "Hinterland")
util <- matrix(c(4,3,4,5,3,NA,NA,
4,3,4,4,3,NA,NA,
3,4,3,1,3,5,NA,
4,4,4,4,4,2,4,
2,1,2,3,1,NA,3),
byrow = TRUE, nrow = 5, ncol = 7,
dimnames=list(row_names,col_names))
util %>%
kbl() %>%
kable_styling()
|
Sharp Obj
|
Arrested Dev
|
Arbitrage
|
Margin C
|
Bojack
|
Orphan B
|
Hinterland
|
George
|
4
|
3
|
4
|
5
|
3
|
NA
|
NA
|
Adam
|
4
|
3
|
4
|
4
|
3
|
NA
|
NA
|
Ben
|
3
|
4
|
3
|
1
|
3
|
5
|
NA
|
Cam
|
4
|
4
|
4
|
4
|
4
|
2
|
4
|
Dan
|
2
|
1
|
2
|
3
|
1
|
NA
|
3
|
Imagine we want to again make a recommendation as to which movie
George should see, based on his ratings and those of
Adam, Ben, Cam, and
Dan:
Who is most useful in predicting George’s rating for Orphan
Black?
We take the relevant users:
m <- cor( t(util), use="pairwise.complete.obs")
## Warning in cor(t(util), use = "pairwise.complete.obs"): the standard deviation is zero
# The relevant row
users_corr <- m[row=c("Adam","Ben","Cam", "Dan"), col=c("Adam")]
users_corr %>%
kbl() %>%
kable_styling()
|
x
|
Adam
|
1.000
|
Ben
|
-0.583
|
Cam
|
NA
|
Dan
|
0.873
|
We normalize the ratings:
util_n <- util - rowMeans(util, na.rm=TRUE) #normalize
# The Movies we care:
movies_corr <- util_n[row=c("Adam","Ben","Cam", "Dan"), col=c("Orphan B", "Hinterland")]
movies_corr %>%
kbl() %>%
kable_styling()
|
Orphan B
|
Hinterland
|
Adam
|
NA
|
NA
|
Ben
|
1.83
|
NA
|
Cam
|
-1.71
|
0.286
|
Dan
|
NA
|
1.000
|
## The most useful user for Orphan Black is Ben
Question 5
Who is most useful in predicting George’s rating for
Hinterland?
movies_corr %>%
kbl() %>%
kable_styling()
|
Orphan B
|
Hinterland
|
Adam
|
NA
|
NA
|
Ben
|
1.83
|
NA
|
Cam
|
-1.71
|
0.286
|
Dan
|
NA
|
1.000
|
## The most useful user for Hinterland is Dan
Question 6
According to the collaborative filtering method, should
Netflix better recommend Orphan Black or Hinterland?
Finally, we predict:
predm <- users_corr * movies_corr
predm %>%
kbl() %>%
kable_styling()
|
Orphan B
|
Hinterland
|
Adam
|
NA
|
NA
|
Ben
|
-1.07
|
NA
|
Cam
|
NA
|
NA
|
Dan
|
NA
|
0.873
|
# Take the average
pred <- colMeans(predm, na.rm=TRUE)
pred %>%
kbl() %>%
kable_styling()
|
x
|
Orphan B
|
-1.069
|
Hinterland
|
0.873
|
predm<-m[row=c("Ben","Dan"),col=c("George")]*util_n[row=c("Ben","Dan"),col=c("Orphan B", "Hinterland")]
predm %>%
kbl() %>%
kable_styling()
|
Orphan B
|
Hinterland
|
Ben
|
-1.6
|
NA
|
Dan
|
NA
|
1
|
# Take the average
pred <- colMeans(predm, na.rm=TRUE)
pred %>%
kbl() %>%
kable_styling()
|
x
|
Orphan B
|
-1.6
|
Hinterland
|
1.0
|
LS0tDQp0aXRsZTogIlF1aXogNTogUmVjb21lbmRhdGlvbiBTeXN0ZW1zIg0KYXV0aG9yOiAiRGFuaWVsIFJlZGVsIg0KZGF0ZTogIjIwMjMtMDEtMjUiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogVFJVRQ0KICAgIHRvY19mbG9hdDogVFJVRQ0KICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUNCi0tLQ0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCm9wdGlvbnMoInNjaXBlbiI9IDIwMCwgImRpZ2l0cyI9Mywgd2lkdGggPSAxMDApDQpsaWJyYXJ5KGthYmxlRXh0cmEpDQpybShsaXN0PWxzKCkpDQpgYGANCg0KIyBRdWVzdGlvbiAxDQoNCkxldCdzIHVzZSB0aGUgWyoqY29udGVudC1iYXNlZCBtZXRob2QqKl17LnVuZGVybGluZX0gdG8gbWFrZSBzb21lIHJlY29tbWVuZGF0aW9ucy4gVGhlIGxhc3QgNSB0aGluZ3MgSSB3YXRjaGVkIG9uIE5ldGZsaXggYW5kIHRoZWlyIGF0dHJpYnV0ZXMgd2VyZToNCg0KYGBge3J9DQpyb3dfbmFtZXMgPC0gYygiRlVOTlkiLCAiUk9NQU5USUMiLCAiU1VTUEVOU0UiLCAiREFSSyIsICJSQVRJTkciKQ0KY29sX25hbWVzIDwtIGMoIlNIQVJQIE9CSkVDVFMiLCAiQVJSRVNURUQgREVWLiIsICJBUkJJVFJBR0UiLCAiTUFSR0lOIENBTEwiLCAiQk9KQUNLIiwgIk9SUEhBTiBCTEFDSyIsICJISU5URVJMQU5EIikNCg0KaXRlbSA8LSBtYXRyaXgoIGMoMCwgMSwgMCwgMSwgMSwgMSwgMCwNCiAgICAgICAgICAgICAgICAgIDEsIDEsIDAsIDAsIDEsIDAsIDAsDQogICAgICAgICAgICAgICAgICAxLCAxLCAxLCAwLCAxLCAwLCAxLA0KICAgICAgICAgICAgICAgICAgMSwgMCwgMSwgMSwgMCwgMSwgMSwNCiAgICAgICAgICAgICAgICAgIDQsIDMsIDQsIDUsIDMsIE5BLCBOQSksIGJ5cm93ID0gVFJVRSwgbnJvdyA9IDUsIG5jb2wgPSA3LA0KICAgICAgICAgICAgICAgIGRpbW5hbWVzID0gbGlzdChyb3dfbmFtZXMsIGNvbF9uYW1lcykpDQppdGVtICU+JSANCiAga2JsKCkgJT4lDQogIGthYmxlX3N0eWxpbmcoKQ0KYGBgDQoNCioqSG93IHNpbWlsYXIgYXJlIG15IHByZWZlcmVuY2VzIHRvIE9ycGhhbiBCbGFjaz8qKsKgWyoqKlI6IDAuMzYqKipdey51bmRlcmxpbmV9DQoNCkNhbGN1bGF0ZSB0aGUgY29zaW5lIHNpbWlsYXJpdHkgYW5kIHJlcG9ydCB0byAyIGRlY2ltYWwgcG9pbnRzIChlLmcuIDAuMTIpDQoNCkZpcnN0LCB3ZSBub3JtYWxpemUgdGhlIHJhdGluZ3MgdG8gdGhlIG1lYW46DQoNCmBgYHtyfQ0KcmF0aW5nIDwtIG1hdHJpeChjKDQsIDMsIDQsIDUsIDMpLCBucm93PTEsbmNvbD01KQ0KcmF0aW5nX20gPC0gcmF0aW5nLW1lYW4ocmF0aW5nKQ0KcmF0aW5nX20gJT4lIA0KICBrYmwoKSAlPiUNCiAga2FibGVfc3R5bGluZygpDQpgYGANCg0KV2Ugbm93IGVzdGltYXRlIGhvdyBtdWNoIHRoZSByYXRpbmdzIGNoYW5nZXMgd2l0aCBhdHRyaWJ1dGVzOg0KDQpgYGB7cn0NCnVzZXIgPC0gaXRlbVsxOjQsMTo1XSAlKiUgdChyYXRpbmdfbSkvcm93U3VtcyhpdGVtWzE6NCwxOjVdKQ0KdXNlciAlPiUgDQogIGtibCgpICU+JQ0KICBrYWJsZV9zdHlsaW5nKCkNCmBgYA0KDQpGaW5hbGx5LCB3ZSB0YWtlIHRoZSBzaW1pbGFyaXR5IHdpdGggcmVzcGVjdCB0byBbKipPcnBoYW4gQmxhY2sqKl17LnVuZGVybGluZX06DQoNCmBgYHtyfQ0KbmV3X2l0ZW0gPC0gaXRlbVsxOjQsNjo3XQ0KDQpDUyA8LSB0KG5ld19pdGVtKSAlKiUgdXNlciAvIHNxcnQoIGNvbFN1bXMobmV3X2l0ZW1eMikgKSpzcXJ0KCBzdW0odXNlcl4yKSApDQpyb3VuZChDUywyKSAlPiUgDQogIGtibCgpICU+JQ0KICBrYWJsZV9zdHlsaW5nKCkNCmBgYA0KDQojIFF1ZXN0aW9uIDINCg0KKipIb3cgc2ltaWxhciBhcmUgbXkgcHJlZmVyZW5jZXMgdG8gSGludGVybGFuZD8qKsKgWyoqKlI6IDAuMjEqKipdey51bmRlcmxpbmV9DQoNCipDYWxjdWxhdGUgdGhlIGNvc2luZSBzaW1pbGFyaXR5IGFuZCByZXBvcnQgdG8gMiBkZWNpbWFsIHBvaW50cyAoZS5nLiAwLjEyKS4qDQoNCmBgYHtyfQ0KQ1MgPC0gdChuZXdfaXRlbSkgJSolIHVzZXIgLyBzcXJ0KCBjb2xTdW1zKG5ld19pdGVtXjIpICkqc3FydCggc3VtKHVzZXJeMikgKQ0KQ1MgJT4lIA0KICBrYmwoKSAlPiUNCiAga2FibGVfc3R5bGluZygpDQpgYGANCg0KIyBRdWVzdGlvbiAzDQoNCioqV2hhdCBzaG91bGQgTmV0ZmxpeCByZWNvbW1lbmQgdG8gbWUsIGJhc2VkIG9uIHRoZSBjb250ZW50LWJhc2VkIG1ldGhvZD8qKg0KDQpgYGB7ciwgZWNobz1GQUxTRX0NCmNhdCgiT3JwaGFuIEJsYWNrLCBiZWNhdXNlIGlzIGNsb3NlciB0byAxIikNCmBgYA0KDQojIFF1ZXN0aW9uIDQNCg0KTm93LCBsZXQncyBkbyB0aGUgc2FtZSBleGVyY2lzZSB0aHJvdWdoIHRoZSBsZW5zIG9mIGNvbGxhYm9yYXRpdmUgZmlsdGVyaW5nIChvbiB1c2Vycywgbm90IGl0ZW1zKS4NCg0KYGBge3J9DQpyb3dfbmFtZXM8LWMoIkdlb3JnZSIsICJBZGFtIiwgIkJlbiIsICJDYW0iLCAiRGFuIikNCmNvbF9uYW1lczwtYygiU2hhcnAgT2JqIiwgIkFycmVzdGVkIERldiIsICJBcmJpdHJhZ2UiLCAiTWFyZ2luIEMiLCAiQm9qYWNrIiwgIk9ycGhhbiBCIiwgIkhpbnRlcmxhbmQiKQ0KDQp1dGlsIDwtIG1hdHJpeChjKDQsMyw0LDUsMyxOQSxOQSwNCiAgICAgICAgICAgICAgICAgNCwzLDQsNCwzLE5BLE5BLA0KICAgICAgICAgICAgICAgICAzLDQsMywxLDMsNSxOQSwNCiAgICAgICAgICAgICAgICAgNCw0LDQsNCw0LDIsNCwNCiAgICAgICAgICAgICAgICAgMiwxLDIsMywxLE5BLDMpLA0KICAgICAgICAgICAgICAgYnlyb3cgPSBUUlVFLCBucm93ID0gNSwgbmNvbCA9IDcsIA0KICAgICAgICAgICAgICAgZGltbmFtZXM9bGlzdChyb3dfbmFtZXMsY29sX25hbWVzKSkNCnV0aWwgJT4lIA0KICBrYmwoKSAlPiUNCiAga2FibGVfc3R5bGluZygpDQpgYGANCg0KSW1hZ2luZSB3ZSB3YW50IHRvIGFnYWluIG1ha2UgYSByZWNvbW1lbmRhdGlvbiBhcyB0byB3aGljaCBtb3ZpZSBHZW9yZ2Ugc2hvdWxkIHNlZSwgYmFzZWQgb24gaGlzIHJhdGluZ3MgYW5kIHRob3NlIG9mICoqQWRhbSoqLCAqKkJlbioqLCAqKkNhbSoqLCBhbmQgKipEYW4qKjoNCg0KKipXaG8gaXMgbW9zdCB1c2VmdWwgaW4gcHJlZGljdGluZyBHZW9yZ2UncyByYXRpbmcgZm9yIE9ycGhhbiBCbGFjaz8qKg0KDQpXZSB0YWtlIHRoZSByZWxldmFudCB1c2VyczoNCg0KYGBge3J9DQptIDwtIGNvciggdCh1dGlsKSwgdXNlPSJwYWlyd2lzZS5jb21wbGV0ZS5vYnMiKQ0KDQojIFRoZSByZWxldmFudCByb3cNCnVzZXJzX2NvcnIgPC0gbVtyb3c9YygiQWRhbSIsIkJlbiIsIkNhbSIsICJEYW4iKSwgY29sPWMoIkFkYW0iKV0NCnVzZXJzX2NvcnIgJT4lIA0KICBrYmwoKSAlPiUNCiAga2FibGVfc3R5bGluZygpDQoNCmBgYA0KDQpXZSBub3JtYWxpemUgdGhlIHJhdGluZ3M6DQoNCmBgYHtyfQ0KdXRpbF9uIDwtIHV0aWwgLSByb3dNZWFucyh1dGlsLCBuYS5ybT1UUlVFKSAjbm9ybWFsaXplDQoNCiMgVGhlIE1vdmllcyB3ZSBjYXJlOg0KbW92aWVzX2NvcnIgPC0gdXRpbF9uW3Jvdz1jKCJBZGFtIiwiQmVuIiwiQ2FtIiwgIkRhbiIpLCBjb2w9YygiT3JwaGFuIEIiLCAiSGludGVybGFuZCIpXQ0KbW92aWVzX2NvcnIgJT4lIA0KICBrYmwoKSAlPiUNCiAga2FibGVfc3R5bGluZygpDQoNCmBgYA0KDQpgYGB7ciwgZWNobz1GQUxTRX0NCmNhdCgiVGhlIG1vc3QgdXNlZnVsIHVzZXIgZm9yIE9ycGhhbiBCbGFjayBpcyBCZW4iKQ0KYGBgDQoNCiMgUXVlc3Rpb24gNQ0KDQoqKldobyBpcyBtb3N0IHVzZWZ1bCBpbiBwcmVkaWN0aW5nIEdlb3JnZSdzIHJhdGluZyBmb3IgSGludGVybGFuZD8qKg0KDQpgYGB7cn0NCm1vdmllc19jb3JyICU+JSANCiAga2JsKCkgJT4lDQogIGthYmxlX3N0eWxpbmcoKQ0KYGBgDQoNCmBgYHtyLCBlY2hvPUZBTFNFfQ0KY2F0KCJUaGUgbW9zdCB1c2VmdWwgdXNlciBmb3IgSGludGVybGFuZCBpcyBEYW4iKQ0KYGBgDQoNCiMgUXVlc3Rpb24gNg0KDQoqKkFjY29yZGluZyB0byB0aGUgY29sbGFib3JhdGl2ZSBmaWx0ZXJpbmcgbWV0aG9kLCBzaG91bGQgTmV0ZmxpeCBiZXR0ZXIgcmVjb21tZW5kIE9ycGhhbiBCbGFjayBvciBIaW50ZXJsYW5kPyoqDQoNCkZpbmFsbHksIHdlIHByZWRpY3Q6DQoNCmBgYHtyfQ0KcHJlZG0gPC0gdXNlcnNfY29yciAqIG1vdmllc19jb3JyDQpwcmVkbSAlPiUgDQogIGtibCgpICU+JQ0KICBrYWJsZV9zdHlsaW5nKCkNCiMgVGFrZSB0aGUgYXZlcmFnZQ0KcHJlZCA8LSBjb2xNZWFucyhwcmVkbSwgbmEucm09VFJVRSkNCnByZWQgJT4lIA0KICBrYmwoKSAlPiUNCiAga2FibGVfc3R5bGluZygpDQoNCmBgYA0KDQpgYGB7cn0NCnByZWRtPC1tW3Jvdz1jKCJCZW4iLCJEYW4iKSxjb2w9YygiR2VvcmdlIildKnV0aWxfbltyb3c9YygiQmVuIiwiRGFuIiksY29sPWMoIk9ycGhhbiBCIiwgIkhpbnRlcmxhbmQiKV0NCnByZWRtICU+JSANCiAga2JsKCkgJT4lDQogIGthYmxlX3N0eWxpbmcoKQ0KIyBUYWtlIHRoZSBhdmVyYWdlDQpwcmVkIDwtIGNvbE1lYW5zKHByZWRtLCBuYS5ybT1UUlVFKQ0KcHJlZCAlPiUgDQogIGtibCgpICU+JQ0KICBrYWJsZV9zdHlsaW5nKCkNCmBgYA0K