unit RAG.Core;

interface

uses
  System.Classes, System.SysUtils, System.IOUtils,
  Server.AiEngines.Core,
  RAG.Database;

type
  TRagSystem = class(TObject)
  private
    FDatabase: TMainDataModule;
    FEmbedder: IEmbedder;
    function GetRelevantDocsFromDB(const AQuery: string; ADomain: Integer; ANumberOfChunks: Integer): TArray<TDocumentChunk>;
    function GetChunks(const AContent,
      AFileName: string): TArray<TDocumentChunk>;
  public
    function GetAugmentedQuery(const AQuery: string; ADomain: Integer): string;
    function AddDocument(const AFileName: string; ADomain: Integer): Integer;
    procedure DeleteDocument(ADocumentId: Integer);

    constructor Create(AEmbedder: IEmbedder);
    destructor Destroy; override;
  end;

implementation

{ TRagSystem }

uses
  Dynamo.Core.ServiceLocator;

const
  NumberOfChunks = 20;
  ChunkLength=1000;
  ChunkOverlap=50;

  PromptTemplate = '''

Contesto: %s


Basandoti sul contesto fornito sopra, per favore rispondi alla seguente domanda:
%s

Se la risposta non pu essere derivata dal contesto, per favore dillo chiaramente.
''';

function SplitStringByLength(const AValue: string; AMaxLength, AOverlap: Integer): TArray<string>;
var
  LIndex: Integer;
  LChunk: string;
begin
  Result := [];
  LIndex := 1;
  if AMaxLength < 100 then
    raise Exception.Create('Chunk too small!');

  while LIndex < Length(AValue) do
  begin
    LChunk := Copy(AValue, LIndex, AMaxLength);
    Result := Result + [LChunk];
    // Si prende "AOverlap" caratteri come overlap tra un chunk e il successivo
    Inc(LIndex, AMaxLength - AOverlap);
  end;
end;

function TRagSystem.GetChunks(
  const AContent, AFileName: string): TArray<TDocumentChunk>;
var
  LChunksStr: TArray<string>;
  LChunksEmbedding: TArray<TArray<Extended>>;
  I: Integer;
  LTag: string;
begin
  LTag := ExtractFileName(ChangeFileExt(AFileName, ''));
  LChunksStr := SplitStringByLength(AContent, ChunkLength, ChunkOverlap);
  LChunksEmbedding := FEmbedder.GetEmbeddingFromStrings(LChunksStr);
  SetLength(Result, Length(LChunksStr));
  for I := Low(Result) to High(Result) do
  begin
    Result[I].Tag := LTag;
    Result[I].Content := LChunksStr[I];
    Result[I].Embedding := LChunksEmbedding[I];
  end;
end;

function TRagSystem.AddDocument(const AFileName: string; ADomain: Integer): Integer;
var
  LChunks: TArray<TDocumentChunk>;
  LContent: string;
  LDocId: Integer;
begin
  LContent := TFile.ReadAllText(AFileName);
  LChunks := GetChunks(LContent, AFileName);
  LDocId := FDatabase.AddDocument(LContent, '');
  FDatabase.SaveChunks(AFileName, LChunks, LDocId);
  FDatabase.UpdateChunksDomain(LDocId, ADomain);
  Result := LDocId;
end;

constructor TRagSystem.Create(AEmbedder: IEmbedder);
begin
  inherited Create;
  FDatabase := TMainDataModule.Create(nil);
  FEmbedder := AEmbedder;
  FDatabase.Provider := FEmbedder.Provider;
  FDatabase.ModelId := FEmbedder.EmbeddingModel;
end;

procedure TRagSystem.DeleteDocument(ADocumentId: Integer);
begin
  FDatabase.DeleteDocument(ADocumentId);
  FDatabase.DeleteChunks(ADocumentId);
end;

destructor TRagSystem.Destroy;
begin
  FDatabase.Free;
  inherited;
end;

function TRagSystem.GetAugmentedQuery(const AQuery: string; ADomain: Integer): string;
var
  LChunk: TDocumentChunk;
  LRelevantDocs: TArray<TDocumentChunk>;
  LContext: string;
begin
  LRelevantDocs := GetRelevantDocsFromDB(AQuery, ADomain, NumberOfChunks);
  LContext := '';
  for LChunk in LRelevantDocs do
    LContext := LContext + Format('[%s] %s', [LChunk.Tag, LChunk.Content]) + sLineBreak;

  Result := Format(PromptTemplate, [LContext, AQuery]);
end;

function TRagSystem.GetRelevantDocsFromDB(const AQuery: string; ADomain: Integer;
  ANumberOfChunks: Integer): TArray<TDocumentChunk>;
var
  LQueryEmbedding: TArray<Extended>;
begin
  LQueryEmbedding := FEmbedder.GetEmbeddingFromString(AQuery);
  Result := FDatabase.GetRelevantDocsFromDB(LQueryEmbedding, ADomain, ANumberOfChunks);
end;

end.
