2. NEW Recommender#
A NEW recommender can be used individually in this way:
2.1. Raw text input and output#
System: Hello!<sep>
User: Hi. I like horror movies, such as <entity>The Shining (1980)</entity> and <entity>Annabelle (2014)</entity>.
Would you please recommend me some other movies?
['21 Bridges (2019)', 'The Conjuring (2013)', 'The Exorcist (1973)']
2.2. Tensor input and output#
# inputs
{'input_ids': tensor([[1, 2]]), 'attention_mask': tensor([[True, True]])}
# logits
tensor([[ 4.3385, 7.7007, 6.6780, -1.6603, -1.5623, 3.8379, 2.0713, 0.2687]], grad_fn=<SumBackward1>)
2.3. Implementation of NEW recommender#
2.3.1. Recommender Configuration: NEWRecConfig
from recwizard.configuration_utils import BaseConfig
class NEWRecConfig(BaseConfig):
"""Configuration class to sotre the
configuration of the NEW recommender."""
def __init__(self, n_items: int = None, dim: int = None, **kwargs):
super().__init__(**kwargs)
self.n_items = n_items
self.dim = dim
# use it!
config = NEWRecConfig(n_items=8, dim=10)
2.3.2. Recommender Tokenizer: NEWRecTokenizer
from typing import List
from recwizard.tokenizer_utils import BaseTokenizer
from recwizard.utility.utils import WrapSingleInput
class NEWRecTokenizer(BaseTokenizer):
"""Tokenizer class for the NEW recommender."""
@WrapSingleInput
def decode(self, ids, *args, **kwargs) -> List[str]:
"""Decode a list of token ids into a list of strings.
Args:
ids (List[int]): list of token ids to decode;
Returns:
List[str]: list of decoded strings;
"""
return [self.id2entity[id] for id in ids if id in self.id2entity]
def __call__(self, *args, **kwargs):
"""Tokenize a string into a list of token ids."""
kwargs.update(return_tensors="pt", padding=True, truncation=True)
return super().__call__(*args, **kwargs)
# use it!
tokenizer = NEWRecTokenizer(id2entity={
0: '21 Bridges (2019)',
1: 'The Shining (1980)',
2: 'Annabelle (2014)',
3: 'The Conjuring (2013)',
4: 'The Exorcist (1973)',
5: 'The Conjuring 2 (2016)',
6: 'The Nun (2018)',
7: 'X men (2019)',
})
3.3.3. Recommender Module: NewRec
import torch
from recwizard.module_utils import BaseModule
from transformers.utils import ModelOutput
class NEWRec(BaseModule):
"""NEW is a module that implements the NEW recommender."""
config_class = NEWRecConfig
tokenizer_class = NEWRecTokenizer
def __init__(self, config: NEWRecConfig, **kwargs):
super().__init__(config, **kwargs)
self.embeds = torch.nn.Embedding(config.n_items, config.dim)
def forward(self, input_ids, attention_mask=None):
"""Forward pass of the NEW recommender."""
embeds = self.embeds(input_ids)
avg_embeds = embeds.sum(dim=1) / (attention_mask.sum(dim=-1, keepdim=True) + 1e-8)
logits = (self.embeds.weight * avg_embeds.unsqueeze(1)).sum(dim=-1)
return ModelOutput({"rec_logits": logits})
@WrapSingleInput
def response(self, raw_input, tokenizer, return_dict=False, topk=3):
"""Generate response from the NEW recommender."""
# convert text input to tensor input
entities = tokenizer(raw_input)['entities'].to(self.device)
inputs = {
"input_ids": entities,
"attention_mask": entities != tokenizer.pad_entity_id,
}
# recommend top-k items
logits = self.forward(**inputs)["rec_logits"]
print(inputs, logits)
logits[torch.arange(logits.size(0)), entities] = float("-inf")
recommended = logits.topk(topk).indices.tolist()
output = tokenizer.batch_decode(recommended)
# return the output
if return_dict:
return {
"output": output,
"input": raw_input,
"recommended": recommended
}
return output
# use it!
model = NEWRec(config)
query = ('System: Hello!'
'<sep>User: Hi. I like horror movies, such as <entity>The Shining (1980)</entity> and <entity>Annabelle (2014)</entity>.'
'Would you please recommend me some other movies?'
)
resp = model.response(
raw_input=query,
tokenizer=tokenizer,
return_dict=True
)
The complete implementation is in examples/develop_model/new_recommender.py.