ONNX Runtime generate() C# API

Note: this API is in preview and is subject to change.

Overview

Model class

Constructor

public Model(string modelPath)

Generate method

public Sequences Generate(GeneratorParams generatorParams)

Tokenizer class

Constructor

public Tokenizer(Model model)

Encode method

public Sequences Encode(string str)

Encode batch method

public Sequences EncodeBatch(string[] strings)

Decode method

public string Decode(ReadOnlySpan<int> sequence)

Decode batch method

public string[] DecodeBatch(Sequences sequences)

Create stream method

public TokenizerStream CreateStream()

TokenizerStream class

Decode method

public string Decode(int token)

GeneratorParams class

Constructor

public GeneratorParams(Model model)

Set search option (double)

public void SetSearchOption(string searchOption, double value)

Set search option (bool) method

public void SetSearchOption(string searchOption, bool value)

Try graph capture with max batch size

 public void TryGraphCaptureWithMaxBatchSize(int maxBatchSize)

Set input ids method

public void SetInputIDs(ReadOnlySpan<int> inputIDs, ulong sequenceLength, ulong batchSize)

Set input sequences method

public void SetInputSequences(Sequences sequences)

Set model inputs

public void SetModelInput(string name, Tensor value)

Generator class

Constructor

public Generator(Model model, GeneratorParams generatorParams)

Is done method

public bool IsDone()

Compute logits

public void ComputeLogits()

Generate next token method

public void GenerateNextToken()

Get sequence

public ReadOnlySpan<int> GetSequence(ulong index)

Sequences class

Num sequences member

public ulong NumSequences { get { return _numSequences; } }

[] operator

public ReadOnlySpan<int> this[ulong sequenceIndex]