Source code for recwizard.modules.llm.modeling_chatgpt_rec

import json
import os
from typing import Union, List
from openai import OpenAI

from recwizard import BaseModule
from recwizard.modules.monitor import monitor
from .tokenizer_chatgpt import ChatgptTokenizer
from .configuration_llm_rec import LLMRecConfig
import logging

logger = logging.getLogger(__name__)


[docs]class ChatgptRec(BaseModule):
    """
    The recommender implemented based on OpanAI's GPT models.

    """

    config_class = LLMRecConfig
    tokenizer_class = ChatgptTokenizer

[docs]    def __init__(self, config: LLMRecConfig, prompt=None, model_name=None, debug=False, **kwargs):
        """
        Initializes the instance based on the config file.

        Args:
            config (ChatgptRecConfig): The config file.
            prompt (str, optional): A prompt to override the prompt from config file.
            model_name (str, optional): The specified GPT model's name. 
        """

        super().__init__(config, **kwargs)
        self.model_name = config.model_name if model_name is None else model_name
        self.prompt = config.prompt if prompt is None else prompt
        self.debug = debug
        self.client = OpenAI()

[docs]    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, config=None, prompt=None, model_name=None):
        """
        Get an instance of this class.

        Args:
            config:
            pretrained_model_name_or_path:
            prompt (str, optional): The prompt to override the prompt from config file.
            model_name (str, optional): The specified GPT model's name.

        Returns:
            (ChatgptRec): the instance.
        """
        if config is None:
            config = cls.config_class.from_pretrained(pretrained_model_name_or_path)
        return cls(config, prompt=prompt, model_name=model_name)

[docs]    def save_pretrained(
            self,
            save_directory: Union[str, os.PathLike],
            push_to_hub: bool = False,
            **kwargs
    ):
        self.config.save_pretrained(save_directory=save_directory, push_to_hub=push_to_hub)

[docs]    @classmethod
    def get_tokenizer(cls, **kwargs):
        """
        Get a tokenizer.

        Returns:
            (ChatgptRecTokenizer): the tokenizer.
        """
        # return lambda x: {'context': x}
        return ChatgptTokenizer()

    @monitor
    def response(self, raw_input: str, tokenizer=None, topk=3, max_tokens=None, temperature=0.5, model_name=None,
                 return_dict=False,
                 **kwargs):
        """
        Generate a template to response the processed user's input.

        Args:
            raw_input (dict): A dict that contains the question and its related information.
            tokenizer (BaseTokenizer, optional): A tokenizer to process the question.
            topk (int): The number of answers.
            max_tokens (int): The maximum number of tokens used for ChatGPT API.
            temperature (float): The temperature value used for ChatGPT API.
            model_name (str, optional): The specified GPT model's name.
            return_dict (bool): Whether to return a dict or a list.


        Returns:
            list: The answers.
        """
        if topk == 0:
            return {'output': [], 'links': []} if return_dict else []
        messages = tokenizer(raw_input)['messages']
        messages.append(self.prompt)
        if self.debug:
            logger.info('\ninput:', messages)
        res = self.client.chat.completions.create(
            model=model_name or self.model_name,
            max_tokens=max_tokens,
            messages=messages,
            temperature=temperature
        ).choices[0].message.content

        if self.debug:
            logger.info('\napi result:', res)

        try:
            answers = json.loads(res)
            if len(answers) < topk:
                raise ValueError()
            output = [answer['name'] for answer in answers][:topk]
            links = [answer['uri'] for answer in answers][:topk]
        except:
            messages[-1] = self.config.backup_prompt
            res = self.client.chat.completions.create(
                model=self.model_name,
                max_tokens=max_tokens,
                messages=messages,
                temperature=temperature
            ).choices[0].message.content
            output = res.split(',')[:topk]
            if len(output) < topk:
                output = output + [''] * (topk - len(output))
            links = [''] * topk

        if return_dict:
            return {
                "input": messages,
                "output": output,
                "links": {name: link for name, link in zip(output, links)}
            }
        else:
            return output