Source code for recwizard.modules.llm.modeling_llama_gen

import os
from typing import Union, List
import openai
from transformers import AutoModelForCausalLM


from recwizard import BaseModule
from recwizard.modules.monitor import monitor
from recwizard.utility import DeviceManager
from .configuration_llm import LLMConfig
from .tokenizer_llama import LlamaTokenizer
import logging

logger = logging.getLogger(__name__)


[docs]class LlamaGen(BaseModule): """ The generator implemented based on OpanAI's GPT models. """ config_class = LLMConfig tokenizer_class = LlamaTokenizer
[docs] def __init__( self, config: LLMConfig, prompt=None, model_name=None, debug=False, **kwargs ): """ Initializes the instance based on the config file. Args: config (ChatgptGenConfig): The config file. prompt (str, optional): A prompt to override the prompt from config file. model_name (str, optional): The specified GPT model's name. """ super().__init__(config, **kwargs) model_name = model_name or config.model_name self.model = AutoModelForCausalLM.from_pretrained(model_name) self.prompt = config.prompt if prompt is None else prompt self.debug = debug
[docs] @classmethod def from_pretrained( cls, pretrained_model_name_or_path, config=None, prompt=None, model_name=None ): """ Get an instance of this class. Args: config: pretrained_model_name_or_path: prompt (str, optional): The prompt to override the prompt from config file. model_name (str, optional): The specified GPT model's name. Returns: the instance. """ if config is None: config = cls.config_class.from_pretrained(pretrained_model_name_or_path) return cls(config, prompt=prompt, model_name=model_name)
[docs] def save_pretrained( self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs, ): self.config.save_pretrained( save_directory=save_directory, push_to_hub=push_to_hub )
[docs] @classmethod def get_tokenizer(cls, **kwargs): """ Get a tokenizer. Returns: (ChatgptTokenizer): the tokenizer. """ # return lambda x: {'context': x} return LlamaTokenizer()
@monitor def response( self, raw_input, tokenizer, recs: List[str] = None, max_tokens=None, temperature=0.5, model_name=None, return_dict=False, **kwargs, ): """ Generate a template to response the processed user's input. Args: raw_input (str): The user's raw input. tokenizer (BaseTokenizer, optional): A tokenizer to process the raw input. recs (list, optional): The recommended movies. max_tokens (int): The maximum number of tokens used for ChatGPT API. temperature (float): The temperature value used for ChatGPT API. model_name (str, optional): The specified GPT model's name. return_dict (bool): Whether to return a dict or a list. Returns: str: The template to response the processed user's input. """ """ LLama format Reference: <s>[INST] <<SYS>> {{ system_prompt }} <</SYS>> {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST] """ # Add prompt at end # suppose the last input is the user's input prompt = self.prompt.copy() if recs is not None: formatted_movies = ", ".join( [f'{i + 1}. "{movie}"' for i, movie in enumerate(recs)] ) prompt["content"] = prompt["content"].format(formatted_movies) raw_input += f"<<SYS>>{prompt['content']}<</SYS>>" encodings = tokenizer(raw_input, return_tensors="pt", padding=True) encodings = DeviceManager.copy_to_device(encodings, self.model.device) res = self.model.generate( **encodings, max_new_tokens=max_tokens, temperature=temperature, eos_token_id=tokenizer.eos_token_id, ) decoded_text = tokenizer.decode(res[0], skip_special_tokens=True) resp_start = decoded_text.rfind("[/INST]") + len("[/INST]") resp = decoded_text[resp_start:].strip(" ") output = "System: {}".format(resp) if return_dict: return { "input": raw_input, "output": output, } return output