RAG

1. LLM construction
2. RAM
3. MCP
4. LLM hosting
- 4.1. Ollama
- 4.2. ChatOllama
5. ONNX conversion
6. Rig

1. LLM construction

1.1. Encoder

1.2. Decoder

1.3. Encoder-Decoder

1.4. Autoencoder

import torch
from torch import nn
from transformers import BertModel, GPT2LMHeadModel, BertTokenizer, GPT2Tokenizer



# Load BERT model and tokenizer
bert_model = BertModel.from_pretrained('bert-base-uncased')
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load GPT-2 model and tokenizer
gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Ensure GPT-2 tokenizer uses a padding token
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token


class BertGpt2Autoencoder(nn.Module):
    def __init__(self, bert_model, gpt2_model):
        super(BertGpt2Autoencoder, self).__init__()
        self.bert = bert_model
        self.gpt2 = gpt2_model
        self.latent_to_gpt2 = nn.Linear(bert_model.config.hidden_size, gpt2_model.config.n_embd)

        # Add device attribute
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)  # Move the model to the appropriate device

    def forward(self, input_ids, attention_mask, labels):
        # Encode input text using BERT
        encoder_outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        latent_representation = encoder_outputs.last_hidden_state[:, 0, :]  # [CLS] token representation

        # Map latent representation to GPT-2 input size
        gpt2_input = self.latent_to_gpt2(latent_representation)
        gpt2_input = gpt2_input.unsqueeze(1).expand(-1, labels.size(1), -1)  # Expand to match label sequence length

        # Decode using GPT-2
        gpt2_outputs = self.gpt2(inputs_embeds=gpt2_input, labels=labels)

        return gpt2_outputs

def prepare_data(text, bert_tokenizer, gpt2_tokenizer):
    # Tokenize input text using BERT tokenizer
    bert_inputs = bert_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=64)

    # Tokenize output text using GPT-2 tokenizer
    gpt2_inputs = gpt2_tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=64)

    return bert_inputs, gpt2_inputs

# Initialize the autoencoder
autoencoder = BertGpt2Autoencoder(bert_model, gpt2_model)
autoencoder = autoencoder.to(autoencoder.device)

# Define optimizer
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=1e-5)

# Sample training data
texts = ["Payment for groceries", "Rent for April", "Monthly subscription fee", "Electricity bill payment"]

# Training loop
for epoch in range(200):  # Number of epochs
    for text in texts:
        # Prepare data
        bert_inputs, gpt2_inputs = prepare_data(text, bert_tokenizer, gpt2_tokenizer)

        input_ids = bert_inputs['input_ids'].to(autoencoder.device)
        attention_mask = bert_inputs['attention_mask'].to(autoencoder.device)
        labels = gpt2_inputs['input_ids'].to(autoencoder.device)

        # Forward pass
        outputs = autoencoder(input_ids, attention_mask, labels)
        loss = outputs.loss  # Use the loss calculated by GPT-2

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")



def detect_anomaly(text, autoencoder, bert_tokenizer, gpt2_tokenizer, threshold=5.0):
    bert_inputs, gpt2_inputs = prepare_data(text, bert_tokenizer, gpt2_tokenizer)
    input_ids = bert_inputs['input_ids'].to(autoencoder.device)
    attention_mask = bert_inputs['attention_mask'].to(autoencoder.device)

    # Forward pass
    with torch.no_grad():
        outputs = autoencoder(input_ids, attention_mask, gpt2_inputs['input_ids'].to(autoencoder.device))

    loss = outputs.loss.item()  # Get the reconstruction loss

    # Anomaly detection based on threshold
    if loss > threshold:
        print(f"Anomaly detected! Loss: {loss}")
    else:
        print(f"Text is normal. Loss: {loss}")

# Example usage
detect_anomaly("Urgent transfer to offshore account", autoencoder, bert_tokenizer, gpt2_tokenizer)

RAG

Table of Contents

1. LLM construction

1.1. Encoder

1.2. Decoder

1.3. Encoder-Decoder

1.4. Autoencoder

2. RAM

2.1. Prompte engineer

2.2. GraphRAG

2.3. Knowledge base

2.4. Retraining

2.5. LoRA

2.6. Quantization

3. MCP

4. LLM hosting

4.1. Ollama

4.2. ChatOllama

5. ONNX conversion

6. Rig