RAG

Table of Contents

1. LLM construction

1.1. Encoder

1.2. Decoder

1.3. Encoder-Decoder

1.4. Autoencoder

import torch
from torch import nn
from transformers import BertModel, GPT2LMHeadModel, BertTokenizer, GPT2Tokenizer



# Load BERT model and tokenizer
bert_model = BertModel.from_pretrained('bert-base-uncased')
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load GPT-2 model and tokenizer
gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Ensure GPT-2 tokenizer uses a padding token
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token


class BertGpt2Autoencoder(nn.Module):
    def __init__(self, bert_model, gpt2_model):
        super(BertGpt2Autoencoder, self).__init__()
        self.bert = bert_model
        self.gpt2 = gpt2_model
        self.latent_to_gpt2 = nn.Linear(bert_model.config.hidden_size, gpt2_model.config.n_embd)

        # Add device attribute
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)  # Move the model to the appropriate device

    def forward(self, input_ids, attention_mask, labels):
        # Encode input text using BERT
        encoder_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        latent_representation = encoder_outputs.last_hidden_state[:, 0, :]  # [CLS] token representation

        # Map latent representation to GPT-2 input size
        gpt2_input = self.latent_to_gpt2(latent_representation)
        gpt2_input = gpt2_input.unsqueeze(1).expand(-1, labels.size(1), -1)  # Expand to match label sequence length

        # Decode using GPT-2
        gpt2_outputs = self.gpt2(inputs_embeds=gpt2_input, labels=labels)

        return gpt2_outputs

def prepare_data(text, bert_tokenizer, gpt2_tokenizer):
    # Tokenize input text using BERT tokenizer
    bert_inputs = bert_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=64)

    # Tokenize output text using GPT-2 tokenizer
    gpt2_inputs = gpt2_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=64)

    return bert_inputs, gpt2_inputs

# Initialize the autoencoder
autoencoder = BertGpt2Autoencoder(bert_model, gpt2_model)
autoencoder = autoencoder.to(autoencoder.device)

# Define optimizer
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-5)

# Sample training data
texts = ["Payment for groceries", "Rent for April", "Monthly subscription fee", "Electricity bill payment"]

# Training loop
for epoch in range(200):  # Number of epochs
    for text in texts:
        # Prepare data
        bert_inputs, gpt2_inputs = prepare_data(text, bert_tokenizer, gpt2_tokenizer)

        input_ids = bert_inputs['input_ids'].to(autoencoder.device)
        attention_mask = bert_inputs['attention_mask'].to(autoencoder.device)
        labels = gpt2_inputs['input_ids'].to(autoencoder.device)

        # Forward pass
        outputs = autoencoder(input_ids, attention_mask, labels)
        loss = outputs.loss  # Use the loss calculated by GPT-2

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")



def detect_anomaly(text, autoencoder, bert_tokenizer, gpt2_tokenizer, threshold=5.0):
    bert_inputs, gpt2_inputs = prepare_data(text, bert_tokenizer, gpt2_tokenizer)
    input_ids = bert_inputs['input_ids'].to(autoencoder.device)
    attention_mask = bert_inputs['attention_mask'].to(autoencoder.device)

    # Forward pass
    with torch.no_grad():
        outputs = autoencoder(input_ids, attention_mask, gpt2_inputs['input_ids'].to(autoencoder.device))

    loss = outputs.loss.item()  # Get the reconstruction loss

    # Anomaly detection based on threshold
    if loss > threshold:
        print(f"Anomaly detected! Loss: {loss}")
    else:
        print(f"Text is normal. Loss: {loss}")

# Example usage
detect_anomaly("Urgent transfer to offshore account", autoencoder, bert_tokenizer, gpt2_tokenizer)


2. RAM

2.1. Prompte engineer

2.2. GraphRAG

2.3. Knowledge base

2.4. Retraining

2.5. LoRA

2.6. Quantization

3. MCP

4. LLM hosting

4.1. Ollama

  • multiple threads

4.2. ChatOllama

5. ONNX conversion

6. Rig

Author: si

Created: 2025-02-06 Do 13:42

Validate