11Performance Engineering и Profiling
BenchmarkDotNet
Введение в BenchmarkDotNet
BenchmarkDotNet — фреймворк для микро-бенчмаркинга в .NET, который обеспечивает точные, воспроизводимые измерения производительности кода.
Почему не Stopwatch?
// ПЛОХО — не учитывает JIT warming, GC, кэши CPU
var sw = Stopwatch.StartNew();
for (int i = 0; i < 1000; i++)
DoWork();
Console.WriteLine(sw.Elapsed);
// ХОРОШО — BenchmarkDotNet
[SimpleJob(RunStrategy.ColdStart, countWarmupRuns: 3, countIterations: 5)]
public void MyBenchmark() => DoWork();Проблемы ручного бенчмаркинга:
- JIT компиляция выполняется во время замера
- GC может запуститься в любой момент
- CPU frequency scaling влияет на результаты
- Нет статистического анализа (среднее, медиана, outliers)
- Нет сравнения версий фреймворков
Архитектура BenchmarkDotNet
Benchmark Method
↓
JIT Compilation (warming phase)
↓
Measurement Phase (multiple runs)
↓
Hardware Counters (optional)
↓
Statistical Analysis
↓
Report Generation (Markdown, HTML, CSV)Базовый Benchmark
Минимальный пример
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
public class StringManipulationBenchmarks
{
private string _data = string.Empty;
private readonly List<string> _words = new();
[GlobalSetup]
public void Setup()
{
_data = string.Join(" ", Enumerable.Range(1, 10000).Select(i => $"word{i}"));
_words = _data.Split(' ').ToList();
}
[Benchmark]
public string StringConcat()
{
var result = "";
foreach (var word in _words)
result += word + " ";
return result.Trim();
}
[Benchmark]
public string StringBuilder()
{
var sb = new StringBuilder();
foreach (var word in _words)
sb.Append(word).Append(' ');
return sb.ToString().Trim();
}
[Benchmark]
public string Join() => string.Join(" ", _words);
[Benchmark]
public string StringJoinOperator() => string.Join(" ", _words);
}
public class Program
{
public static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<StringManipulationBenchmarks>();
}
}Установка
<!-- .csproj -->
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
</ItemGroup>
</Project>dotnet restore
dotnet run -c Release --project Benchmarks.csprojJob Configuration
RunStrategy
[SimpleJob(RunStrategy.ColdStart,
warmupCount: 3,
iterationCount: 5,
launchCount: 2)]
public class ColdStartBenchmarks
{
// Cada benchmark запускается в новом процессе
// Минимум warming — подходит для измерения startup time
}
[ThroughputJob] // RunStrategy.Monitoring по умолчанию
public class ThroughputBenchmarks
{
// Continuous — измеряет throughput (ops/sec)
// Много warming итераций для стабильности
}
[MetricJob]
public class MetricBenchmarks
{
// Measurement — фокусируется на точности метрик
// Минимум warming, максимум итераций
}RuntimeMoniker — Сравнение версий .NET
[SimpleJob(RunStrategy.Throughput, baseline: true)]
[SimpleJob(RunStrategy.Throughput, runtimeMoniker: RuntimeMoniker.Net90)]
[SimpleJob(RunStrategy.Throughput, runtimeMoniker: RuntimeMoniker.Net80)]
[SimpleJob(RunStrategy.Throughput, runtimeMoniker: RuntimeMoniker.Net70)]
public class FrameworkComparisonBenchmarks
{
[Benchmark(Baseline = true)]
public string Net7_StringJoin() => string.Join(" ", _words);
[Benchmark]
public string Net8_StringJoin() => string.Join(" ", _words);
[Benchmark]
public string Net9_StringJoin() => string.Join(" ", _words);
}Platform и Architecture
[SimpleJob(platform: Platform.X64)]
[SimpleJob(platform: Platform.X86)]
[SimpleJob(platform: Platform.Arm64)]
public class PlatformComparisonBenchmarks { }
// Или через атрибуты
[SimpleJob(runtimeMoniker: RuntimeMoniker.Net90, platform: Platform.X64, arch: Arch64)]
[SimpleJob(runtimeMoniker: RuntimeMoniker.Net90, platform: Platform.Arm64, arch: Arch64)]GC Server vs Workstation
[SimpleJob(gcServer: false)] // Workstation GC
[SimpleJob(gcServer: true)] // Server GC
public class GcModeBenchmarks
{
[Benchmark]
public void MemoryAllocation()
{
var arr = new byte[1024 * 1024]; // 1MB allocation
Buffer.BlockCopy(arr, 0, arr, 0, arr.Length);
}
}Custom Job Definition
public class MyCustomJob : Job
{
public MyCustomJob()
{
Id = "Custom-10M-ops";
// Run settings
Run = RunStrategy
.Throughput
.WithWarmupCount(10)
.WithIterationCount(7)
.WithLaunchCount(1)
.WithTargetCount(10_000_000); // 10M ops per benchmark
// GC settings
Gc = GcMode
.Server
.WithGcServer(true)
.WithGcForce(true) // GC.Collect перед каждым замером
.WithGcConcurrent(false);
// Toolchain
Toolchain = Toolchains.BuildDotNet
.WithBuildDirectory("./artifacts");
}
}
// Использование
[Job(MyCustomJob)]
public class MyBenchmarks { }Hardware Counters
Подключение
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
[HardwareCounters(
HardwareCounter.ExecutionTime,
HardwareCounter.CpuClocks,
HardwareCounter.InstructionsRetired,
HardwareCounter.L1CacheAccesses,
HardwareCounter.L2CacheAccesses,
HardwareCounter.L3CacheAccesses,
HardwareCounter.L1CacheMisses,
HardwareCounter.L2CacheMisses,
HardwareCounter.L3CacheMisses,
HardwareCounter.BranchInstructions,
HardwareCounter.BranchMisses)]
public class HardwareCountersBenchmark
{
private int[] _data;
[GlobalSetup]
public void Setup()
{
_data = Enumerable.Range(0, 1_000_000).ToArray();
}
[Benchmark]
public int SumSequential()
{
int sum = 0;
for (int i = 0; i < _data.Length; i++)
sum += _data[i];
return sum;
}
[Benchmark]
public int SumParallel()
{
long sum = 0;
Parallel.For(0, _data.Length, i =>
{
long localSum = 0;
for (int j = i; j < _data.Length; j += Environment.ProcessorCount)
localSum += _data[j];
Interlocked.Add(ref sum, localSum);
});
return (int)sum;
}
}Интерпретация Hardware Counters
| Counter | Описание | Что считается хорошим |
|---|---|---|
L1CacheMisses | Пропуски L1 кэша | < 1% от L1 accesses |
L2CacheMisses | Пропуски L2 кэша | < 5% от L2 accesses |
L3CacheMisses | Пропуски L3 кэша (RAM access) | < 10% от L3 accesses |
BranchMisses | Ошибки предсказания ветвлений | < 5% от branch instructions |
InstructionsRetired | Выполненные инструкции | Меньше = лучше для same work |
CpuClocks | Такты процессора | Меньше = лучше |
Кастомные Hardware Counters
[HardwareCounters(
new PerformanceCounterDescription("Cache Misses", 1),
new PerformanceCounterDescription("Branch Misses", 5))]
public class CustomHardwareCountersBenchmark { }Statistical Analysis
Metrics, которые предоставляет BDN
| Metric | Описание |
|---|---|
Mean | Среднее время выполнения |
Median | Медианное время |
StdDev | Стандартное отклонение |
Min | Минимальное время |
Max | Максимальное время |
P05, P25, P50, P75, P95, P99 | Перцентили |
Outliers | Выбросы (вычислены статистически) |
Operations/sec | Throughput |
Outlier Detection
// BDN использует IQR метод для обнаружения выбросов
// Q1 = 25th percentile, Q3 = 75th percentile
// Outlier = значение < Q1 - 1.5*IQR или > Q3 + 1.5*IQR
// Настройка sensitivity
[MemoryDiagnoser]
[OutlierDetection(OutlierMode.RemoveEarly)] // Удалить ранние выбросы
public class OutlierBenchmark
{
// Или с порогами
[OutlierDetection(
outlierMode: OutlierMode.RemoveAll,
outlierPercentile: 0.01)] // Удалять 1% экстремальных значений
public void MyMethod() { }
}Confidence Intervals
// BDN автоматически рассчитывает 95% confidence interval
// Для статистической значимости: CI не должен пересекаться между сравнениями
[RankColumn(RankOrder.Ascending)]
public class StatisticalComparisonBenchmark
{
// Если CI для MethodA не пересекается с CI для MethodB
// → разница статистически значима
}Diagnostics
Memory Diagnostics
[MemoryDiagnoser] // Показывает GC stats и allocations
public class MemoryBenchmark
{
[Benchmark]
public void Allocations()
{
var list = new List<int>(1000);
for (int i = 0; i < 1000; i++)
list.Add(i);
}
}
// Вывод:
// | Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated |
// |-------------|----------|----------|----------|--------|------|-----------|
// | Allocations | 1.234 μs | 0.012 μs | 0.011 μs | 0.0610 | - | 4.2 KB |Exporters — Форматы отчётов
[MarkdownExporter] // GitHub-friendly markdown
[HtmlExporter] // Interactive HTML report
[CsvExporter] // CSV for spreadsheet analysis
[RPlotExporter] // R plots for statistical analysis
[GithubExporter] // GitHub-flavored markdown
public class ExporterBenchmark
{
// Reports generated in ./results/ folder
}
// Custom export directory
[MarkdownExporter, HtmlExporter, CsvExporter]
[ArtifactsDirectory(@"./benchmarks/results")]
public class CustomExportBenchmark { }Event Pipe — Real-time diagnostics
[EventPipeProfiler(EventPipeProfiler.CpuSampling)]
public class ProfiledBenchmark
{
// Generates .netperf file during benchmark
// Open with Visual Studio → Analysis → Profiling Results
}Advanced Patterns
Category-based Organization
// Разделение бенчмарков по категориям
[Categories("String", "Allocation")]
public class StringAllocatingBenchmark
{
[Benchmark]
public string Split() => "a b c".Split(' ');
}
[Categories("String", "ZeroAlloc")]
public class StringZeroAllocBenchmark
{
[Benchmark]
public int IndexOf() => "hello world".IndexOf(' ');
}
// Запуск только определённой категории
// dotnet run -c Release --filter *String*
// dotnet run -c Release --filter *ZeroAlloc*Params — Parameterized Benchmarks
public class ParametrizedBenchmark
{
[Params(100, 1000, 10000, 100000)]
public int CollectionSize;
private int[] _data;
[GlobalSetup]
public void Setup()
{
_data = Enumerable.Range(0, CollectionSize).ToArray();
}
[Benchmark]
public int Sum() => _data.Sum();
[Benchmark]
public int ParallelSum()
{
long sum = 0;
Parallel.For(0, _data.Length, i =>
{
long local = 0;
for (int j = i; j < _data.Length; j += Environment.ProcessorCount)
local += _data[j];
Interlocked.Add(ref sum, local);
});
return (int)sum;
}
}AllParameters — Multi-parameter Benchmarks
public class MultiParamBenchmark
{
[AllParameters] // Combines all param combinations
public class Config
{
[Argument(100)] public int Size { get; init; }
[Argument(true)] public bool UseArray { get; init; }
[Argument(false)] public bool UseSpan { get; init; }
}
[Benchmark]
public void Process()
{
// Test all 4 combinations: (100,true,false), (100,true,true), (100,false,false), (100,false,true)
}
}Benchmark Categories with Filter
# Запуск только String бенчмарков
dotnet run -c Release --filter Category:String
# Исключение Allocation бенчмарков
dotnet run -c Release --filter ~Category:Allocation
# Regex filter
dotnet run -c Release --filter FullyQualifiedName~MyCompany.BenchmarksBaseline и Relative Metrics
Baseline Benchmark
[SimpleJob(RunStrategy.Throughput, baseline: true)] // Baseline = 100%
[SimpleJob(RunStrategy.Throughput)]
public class BaselineComparisonBenchmark
{
[Benchmark(Baseline = true)]
public string OriginalImplementation()
{
var sb = new StringBuilder();
for (int i = 0; i < 1000; i++)
sb.Append(i);
return sb.ToString();
}
[Benchmark]
public string OptimizedImplementation()
{
return string.Join("", Enumerable.Range(0, 1000));
}
}
// Вывод:
// | Method | Mean | Ratio | RatioSD |
// |------------------------|-----------|-------|---------|
// | OriginalImplementation | 12.345 μs | 1.00 | 0.00 |
// | OptimizedImplementation| 8.123 μs | 0.66 | 0.01 |
// ← Optimized в 1.52x быстрее (Ratio < 1 = лучше)Relative Allocations
[MemoryDiagnoser]
[BaselineColumn]
[RelativeColumn]
public class AllocationComparisonBenchmark
{
[Benchmark(Baseline = true)]
public string Allocating() => "test".ToUpper();
[Benchmark]
public string ZeroAlloc() => "test"; // Already upper
}CI Integration — Performance Regression Detection
Кастомный reporter для CI
using BenchmarkDotNet.Reports;
using BenchmarkDotNet.Running;
public class CiRegressionReporter : MarkdownExporter
{
public override string ToSummary(BenchmarkCase benchmarkCase, Summary summary)
{
var markdown = base.ToSummary(benchmarkCase, summary);
// Check for regression
foreach (var benchmark in summary.Benchmarks)
{
var stats = benchmark.Results.Data;
if (stats.Length >= 2)
{
var baseline = stats.First(s => s.IsBaseline);
var current = stats.First(s => !s.IsBaseline);
var baselineMean = baseline.Mean;
var currentMean = current.Mean;
// Если current > baseline * 1.1 → regression > 10%
if (currentMean > baselineMean * 1.1)
{
var regression = ((currentMean - baselineMean) / baselineMean) * 100;
Console.WriteLine($"PERFORMANCE REGRESSION DETECTED: {regression:F1}%");
Console.WriteLine($"Baseline: {baselineMean:F3} μs → Current: {currentMean:F3} μs");
// Exit with error code for CI
Environment.Exit(1);
}
}
}
return markdown;
}
}GitHub Actions CI Pipeline
# .github/workflows/performance.yml
name: Performance Regression Test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
benchmark:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: '9.0.x'
- name: Install dependencies
run: dotnet restore
- name: Run benchmarks
run: |
cd benchmarks
dotnet run -c Release --filter Category:Critical
- name: Check performance regression
run: |
$result = dotnet run -c Release --filter Category:Critical -- --exporters csv
if ($LASTEXITCODE -ne 0) {
Write-Error "Performance regression detected!"
exit 1
}xUnit Benchmark Integration
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Xunit;
using Xunit.Abstractions;
public class XUnitBenchmarkTests
{
private readonly ITestOutputHelper _output;
public XUnitBenchmarkTests(ITestOutputHelper output)
{
_output = output;
}
[Fact]
public void CriticalPath_ShouldNotExceedBudget()
{
var summary = BenchmarkRunner.Run<CriticalPathBenchmarks>();
var mean = summary.Benchmarks.First().Results[0].Mean;
_output.WriteLine($"Mean: {mean:F3} μs");
// Performance budget: < 100 μs
Assert.True(mean < 100, $"Performance budget exceeded: {mean:F3} μs");
}
}
[MemoryDiagnoser]
public class CriticalPathBenchmarks
{
[Benchmark]
public void Serialize()
{
var data = new { Id = 1, Name = "Test", Value = 42.0 };
JsonSerializer.Serialize(data);
}
}.NET 8 vs .NET 9 Performance Comparison
Пример сравнения
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Engines;
[SimpleJob(RuntimeMoniker.Net80, RunStrategy.Throughput, baseline: true)]
[SimpleJob(RuntimeMoniker.Net90, RunStrategy.Throughput)]
[MemoryDiagnoser]
[HardwareCounters(
HardwareCounter.L1CacheMisses,
HardwareCounter.L2CacheMisses,
HardwareCounter.L3CacheMisses,
HardwareCounter.BranchMisses)]
public class DotnetVersionComparisonBenchmark
{
private readonly byte[] _data = new byte[1024 * 1024]; // 1MB
private readonly string _json = """{"items":[{"id":1,"name":"test","value":42.0}]}""";
[GlobalSetup]
public void Setup()
{
var random = new Random(42);
random.NextBytes(_data);
}
[Benchmark]
public void MemoryCopy() => Buffer.BlockCopy(_data, 0, _data, 0, _data.Length);
[Benchmark]
public void SpanCopy()
{
Span<byte> span = _data;
span.CopyTo(span);
}
[Benchmark]
public string JsonSerializer() => JsonSerializer.Serialize(new { id = 1, name = "test" });
[Benchmark]
public string SystemTextJson() => JsonSerializer.Serialize(new { id = 1, name = "test" });
[Benchmark]
public string StringOperations()
{
var s = "hello world 2024";
return s.ToUpper().Replace("2024", "2025");
}
[Benchmark]
public int StringIndexOf() => "hello world".IndexOf('w');
[Benchmark]
public void LinqSum() => Enumerable.Range(0, 100000).Sum();
}Ожидаемые результаты .NET 9 improvements
| Benchmark | .NET 8 | .NET 9 | Improvement |
|---|---|---|---|
| MemoryCopy | ~500ns | ~350ns | ~30% faster |
| SpanCopy | ~500ns | ~350ns | ~30% faster |
| JsonSerializer | ~12μs | ~10μs | ~17% faster |
| StringOperations | ~800ns | ~650ns | ~19% faster |
| LinqSum | ~120μs | ~100μs | ~17% faster |
Практика
Задание 1: Benchmark Suite для String Manipulation
Создайте benchmark suite с hardware counters для:
string.Split()vsMemoryExtensions.Split()(Span-based)StringBuilder.Append()vsstring.Create()string.Concat()vsstring.Join()- Regex matching vs
Contains()/StartsWith()
Требования:
- Hardware counters: L1/L2/L3 cache misses, branch misses
- MemoryDiagnoser для всех бенчмарков
- Params: размер входных данных 10, 100, 1000, 10000
- Markdown + HTML export
Задание 2: .NET 8 vs .NET 9 Comparison
Сравнить производительность для workload:
- Serialization/Deserialization
- LINQ operations
- String operations
- Dictionary lookups
Требования:
- Job configuration для .NET 8 и .NET 9
- Baseline comparison с Ratio column
- Hardware counters для cache analysis
- Statistical significance verification
Задание 3: CI Regression Test
Настроить pipeline, который:
- Запускает критические бенчмарки при каждом PR
- Сравнивает результаты с baseline (main branch)
- Fail при >10% регрессии
- Publishes benchmark report как artifact
Контрольные вопросы
- Зачем нужен JIT warming и как BDN с ним работает?
- В чём разница между RunStrategy.ColdStart и RunStrategy.Throughput?
- Как интерпретировать Hardware Counters?
- Как обнаружить performance regression в CI?
- Зачем нужен baseline в BDN?
Diagnostic Tools
Введение в .NET Diagnostic Tools
.NET Diagnostic Tools — набор командной строки для профилирования и диагностики .NET приложений в реальном времени.
Инструменты
| Tool | Purpose | Install |
|---|---|---|
dotnet-trace | CPU sampling, event tracing (ETW) | dotnet tool install -g dotnet-trace |
dotnet-counters | Real-time metrics (CPU, GC, threads, allocations) | dotnet tool install -g dotnet-counters |
dotnet-gcdump | Heap analysis, object graph inspection | dotnet tool install -g dotnet-gcdump |
dotnet-dump | Thread analysis, stack traces, memory dumps | dotnet tool install -g dotnet-dump |
Установка
# Install all tools
dotnet tool install -g dotnet-trace
dotnet tool install -g dotnet-counters
dotnet tool install -g dotnet-gcdump
dotnet tool install -g dotnet-dump
# Verify installation
dotnet trace --help
dotnet counters --help
dotnet gcdump --help
dotnet dump --helpdotnet-trace — CPU Profiling
CPU Sampling
# Start tracing (CPU sampling)
dotnet trace collect -n MyApplication --profile cpu-sampling
# Output: MyApplication_*.nettrace
# View trace
dotnet trace report --process <pid>
# View with visualizer
dotnet trace report --format html --process <pid>Collecting with Process ID
# Find process
dotnet trace list
# Collect from running process
dotnet trace collect -p 12345 --profile cpu-sampling --duration 00:02:00
# Collect with custom events
dotnet trace collect -p 12345 \
--providers Microsoft-DotNETCore-SampleEvents \
--format nettraceETW Event Tracing
# Collect ETW events for detailed analysis
dotnet trace collect -p 12345 \
--providers \
Microsoft-Windows-Kernel-Process \
Microsoft-Windows-Kernel-Thread \
Microsoft-Windows-Kernel-Network \
Microsoft-Windows-Kernel-File \
--output trace.etl
# .NET-specific ETW providers
dotnet trace collect -p 12345 \
--providers \
Microsoft-Windows-DotNETRuntime \
Microsoft-Windows-DotNETRuntimeRundown \
--output dotnet-trace.nettraceFlame Graph Generation
# Generate flame graph from trace
dotnet trace report --process <pid> --format flamegraph
# Save to file
dotnet trace report --process <pid> --format flamegraph --output flamegraph.html
# Filter by namespace
dotnet trace report --process <pid> --filter "MyApp.Services.*"Custom Providers
# Define custom event source
dotnet trace collect -p 12345 \
--providers \
"MyCompany.MyApp:0:0:8:MyEvent" \
"Microsoft-DotNETCore-SampleEvents:0:5:0:myevent" \
--duration 00:05:00
# Common .NET Runtime providers
dotnet trace collect -p 12345 \
--providers \
"Microsoft-Windows-DotNETRuntime:0:0:0:Default" \
"Microsoft-Windows-DotNETRuntimeRundown:0:0:0:Default"dotnet-counters — Real-time Metrics
Monitoring Lifecycle
# List available counters
dotnet counters list
# Monitor a running process
dotnet counters monitor -p 12345
# Monitor with custom refresh interval
dotnet counters monitor -p 12345 --refresh-interval 1000
# Monitor specific counters
dotnet counters monitor -p 12345 \
--counters Microsoft.NETCore.Runtime \
System.Runtime \
System.Runtime.JobsKey Counter Sets
# .NET Runtime counters
dotnet counters monitor -p 12345 \
--counters Microsoft.NETCore.Runtime
# Available counters:
# - dotnet_gc_count_gen0_collections
# - dotnet_gc_count_gen1_collections
# - dotnet_gc_count_gen2_collections
# - dotnet_gc_heap_size_bytes
# - dotnet_gc_promoted_bytes
# - dotnet_gc_finalization_pending_count
# - dotnet_gc_committed_bytes
# - dotnet_gc_fragmentation_count
# System.Runtime counters
dotnet counters monitor -p 12345 \
--counters System.Runtime
# Available counters:
# - dotnet_cpu_time_pct
# - dotnet_exception_count
# - dotnet_gen_0_heap_size_bytes
# - dotnet_gen_1_heap_size_bytes
# - dotnet_gen_2_heap_size_bytes
# - dotnet_loh_size_bytes
# - dotnet_poh_size_bytes
# - dotnet_monitor_lock_contention_count
# - dotnet_number_of_timers
# - dotnet_number_of_workers_threads
# - dotnet_number_of_completion_port_threads
# - dotnet_assembly_count
# - dotnet_assembly_bytes
# - dotnet_exception_size_bytes
# - dotnet_interop_marshalling_bytes
# ASP.NET Core counters
dotnet counters monitor -p 12345 \
--counters Microsoft.AspNetCore.Server.Kestrel
# Available counters:
# - aspnetcore_requests_queue_count
# - aspnetcore_requests_per_sec
# - aspnetcore_active_requests
# - aspnetcore_connection_count
# - aspnetcore_current_tls_handshake_count
# - aspnetcore_request_queue_time_avg_msContinuous Monitoring with Output
# Continuous output to file
dotnet counters monitor -p 12345 \
--counters System.Runtime \
--output metrics.csv \
--format csv \
--refresh-interval 1000 \
--count 60 # 60 samples at 1Hz
# JSON output
dotnet counters monitor -p 12345 \
--counters Microsoft.NETCore.Runtime \
--output metrics.json \
--format json
# Custom counter set
dotnet counters monitor -p 12345 \
--counters \
"System.Runtime:dotnet_cpu_time_pct" \
"System.Runtime:dotnet_gen_2_heap_size_bytes" \
"System.Runtime:dotnet_gc_fragmentation_count"Load Testing with Counters
# Monitor during load test
dotnet counters monitor -p 12345 \
--counters \
"System.Runtime:dotnet_cpu_time_pct" \
"System.Runtime:dotnet_gen_0_heap_size_bytes" \
"System.Runtime:dotnet_gen_1_heap_size_bytes" \
"System.Runtime:dotnet_gen_2_heap_size_bytes" \
"System.Runtime:dotnet_loh_size_bytes" \
"System.Runtime:dotnet_gc_committed_bytes" \
"Microsoft.AspNetCore.Server.Kestrel:aspnetcore_requests_per_sec" \
"Microsoft.AspNetCore.Server.Kestrel:aspnetcore_active_requests" \
--refresh-interval 500 \
--output load-test-metrics.csv \
--format csvdotnet-gcdump — Heap Analysis
Generating GC Dump
# Generate from running process
dotnet gcdump collect -p 12345
# Output: gcdump_*.gcdump
# Generate with custom name
dotnet gcdump collect -p 12345 -o my-app-gcdump.gcdump
# Generate from multiple processes
dotnet gcdump collect -n MyApplication
# Generate from core dump
dotnet gcdump collect -d my-application.coredumpAnalyzing GC Dump
# View summary
dotnet gcdump report my-app-gcdump.gcdump
# Detailed analysis
dotnet gcdump report my-app-gcdump.gcdump --format detailed
# Object type breakdown
dotnet gcdump report my-app-gcdump.gcdump --format objects
# Generation breakdown
dotnet gcdump report my-app-gcdump.gcdump --format generations
# Root analysis
dotnet gcdump report my-app-gcdump.gcdump --format rootsObject Graph Analysis
# Find top object types by count
dotnet gcdump report my-app-gcdump.gcdump --top-by-count 50
# Find top object types by size
dotnet gcdump report my-app-gcdump.gcdump --top-by-size 50
# Find specific type
dotnet gcdump report my-app-gcdump.gcdump --type "MyApp.Models.User"
# Find retention paths
dotnet gcdump report my-app-gcdump.gcdump --roots --type "MyApp.Cache.SessionCache"GC Heap Analysis
# Generation distribution
dotnet gcdump report my-app-gcdump.gcdump --generations
# Typical output:
# Generation 0: 1,234 objects, 45.6 KB
# Generation 1: 567 objects, 1.2 MB
# Generation 2: 89 objects, 15.3 MB
# GC heap summary
dotnet gcdump report my-app-gcdump.gcdump --summary
# Large Object Heap analysis
dotnet gcdump report my-app-gcdump.gcdump --type "System.Byte[]" --top-by-size 20Programmatic GC Dump Analysis
using System.Diagnostics.GCDump;
// Parse gcdump file
using var gcdump = new GCDumpReader("my-app-gcdump.gcdump");
// Get generation info
foreach (var gen in gcdump.Generations)
{
Console.WriteLine($"Gen {gen.Generation}: {gen.ObjectCount} objects, {gen.TotalSize} bytes");
}
// Find objects by type
var userObjects = gcdump.GetObjects("MyApp.Models.User");
Console.WriteLine($"Found {userObjects.Count} User objects");
// Find retention paths
foreach (var root in gcdump.GetRoots())
{
Console.WriteLine($"Root: {root.Type} -> {root.ObjectCount} objects");
foreach (var obj in root.GetObjectPath())
{
Console.WriteLine($" -> {obj.Type}");
}
}dotnet-dump — Process Dump Analysis
Generating Dumps
# Interactive dump (from running process)
dotnet dump collect -p 12345
# Full memory dump
dotnet dump collect -p 12345 --type full
# Mini dump (faster, smaller)
dotnet dump collect -p 12345 --type mini
# Dump on exception
dotnet-dump collect --process-id <pid> --dump-type mini --continue
# Dump from core file
dotnet dump collect -d my-application.coredumpDump Analysis
# Start interactive dump analysis
dotnet dump analyze
# In the analyze shell:
> clrstack # Show managed call stack
> !dumpheap # List objects on heap
> !dumpobj <addr> # Inspect object at address
> !dumpdomain # Show application domains
> !threads # Show all threads
> !syncblk # Show synchronization blocks
> !peb # Show Process Environment BlockCommon Dump Analysis Commands
# Find all objects of a type
> !dumpheap -type MyApp.Models.User
# Output:
# MT Field Type
# 00007fff8a123456 0000000000 MyApp.Models.User
# ...
# Find largest objects
> !dumpheap -stat -type System.Byte[]
# Output:
# MT Count TotalSize Class Name
# 00007fff8a123456 1234 123456789 System.Byte[]
# ...
# Inspect specific object
> !dumpobj 000001a2b3c4d5e6
# Output:
# Name: MyApp.Models.User
# MethodTable: 00007fff8a123456
# EEClass: 00007fff89abcdef
# Size: 128(0x80) bytes
# Fields:
# MT Field Offset Type VT Attr Value Name
# 00007fff8a123789 00000000 00000008 System.Int32 1 instance 42 Id
# 00007fff8a123abc 00000008 00000010 System.String 0 instance 000001a2b3c4d600 NameThread Analysis
# Show all threads
> !threads
# Output:
# ThreadCount: 15
# UnstartedThread: 0
# BackgroundThread: 12
# PendingThread: 0
# DeadThread: 1
# Hosted Runtime: yes
# Lock
# ID OSID ThreadOBJ State GC Mode GC Alloc Context Domain Count Apt Exception
# 10 1234 000001a2b3c0 2002020 Preemptive 0000000000000000 000001a2b400 1 MTA (Thread Pool Worker)
# 11 5678 000001a2b3c8 10202 Preemptive 0000000000000000 000001a2b400 2 MTA (Thread Pool Completion Port)
# Show managed stack for specific thread
> ~10clrstack
# Output:
# OS Thread Id: 1234 (10)
# Child SP IP Call Site
# 000000AABBCC0000 00007fff8a123456 MyApp.Services.UserService.GetUser(System.Guid)
# 000000AABBCC0100 00007fff8a123789 MyApp.Controllers.UserController.Get(System.Guid)
# ...
# Find blocked threads
> !dumplock
# Output:
# Scaning free locks...
# LockOwner OS Thread Id Object Address Type Recursion
# 000001a2b3c0 1234 (10) 000001a2b400 System.Threading.Mutex 1
# 000001a2b3c8 5678 (11) 000001a2b408 System.Threading.Mutex 0Memory Leak Detection
# Find leaked objects by type
> !dumpheap -stat -type MyApp.Cache.SessionCache
# Check if count grows over time
# Find objects with specific field value
> !dumpheap -mt <MethodTable> -short | ForEach { $_.ToString() } | Where-Object { $_.Contains("userId=12345") }
# Analyze finalizer queue
> !finalizequeue
# Output:
# Free MT Size
# 000001a2b500 00007fff8a123456 128 (0x80) MyApp.Models.User
# 000001a2b580 00007fff8a123789 64 (0x40) MyApp.Models.Order
# ...PerfView — Advanced ETW Analysis
Installation
# Download from GitHub
# https://github.com/microsoft/perfview/releases
# Or via winget
winget install Microsoft.PerfViewCollecting Data
# CPU profiling
PerfView.exe /AcceptEula /KernelProviders=CPU /MaxCollectSec=120 Collect
# GC profiling
PerfView.exe /AcceptEula /GC /MaxCollectSec=120 Collect
# Network profiling
PerfView.exe /AcceptEula /Network /MaxCollectSec=120 Collect
# Combined profiling
PerfView.exe /AcceptEula /CPU:250 /GC /Memory:100 /MaxCollectSec=300 Collect
# Custom provider
PerfView.exe /AcceptEula /Providers=Microsoft-Windows-DotNETRuntime /MaxCollectSec=300 CollectAnalysis Features
# Open collected data
PerfView.exe MyData.zip
# Features:
# 1. CPU Sampling — Flame graphs, hot paths
# 2. GC Events — Pause times, collection counts, heap sizes
# 3. Memory — Allocation rates, GC heap analysis
# 4. Thread — Thread pool usage, contention
# 5. ETW Events — Custom event correlationGC Event Analysis
# PerfView GC Analysis shows:
# - GC pause time per collection
# - Gen 0 / Gen 1 / Gen 2 collection frequency
# - Heap size over time
# - Promotion rate
# - Finalization rate
# - LOH fragmentation
# - GC mode (Workstation vs Server)Практика
Задание 1: CPU Bottleneck через Flame Graph
Сценарий: Приложение с CPU bottleneck в service layer.
Шаги:
- Запустить приложение с нагрузкой
dotnet trace collect -p <pid> --profile cpu-sampling --duration 00:01:00dotnet trace report --format flamegraph --output flamegraph.html- Найти hot path — функция с наибольшим % CPU
- Оптимизировать и измерить improvement
Задание 2: GC Profiling под нагрузкой
Сценарий: API сервер с высокой частотой GC collections.
Шаги:
dotnet counters monitor -p <pid> --counters System.Runtime --refresh-interval 500- Запустить load test (100 RPS)
- Наблюдать за: -
- Найти correlation между load и GC activity
- Оптимизировать allocations
dotnet_gen_0_heap_size_bytes — рост Gen 0
- dotnet_gc_count_gen0_collections — частота Gen 0
- dotnet_loh_size_bytes — LOH usage
Задание 3: Memory Leak через Heap Dump
Сценарий: Приложение медленно растёт в памяти.
Шаги:
dotnet gcdump collect -p <pid> -o before.gcdump- Подождать 30 минут под нагрузкой
dotnet gcdump collect -p <pid> -o after.gcdump- Сравнить:
dotnet gcdump report after.gcdump --top-by-count 50 - Найти типы, которые выросли
- Найти retention paths:
dotnet gcdump report after.gcdump --roots --type <leaked-type> - Устранить leak
Контрольные вопросы
- В чём разница между dotnet-trace и dotnet-counters?
- Как интерпретировать flame graph?
- Когда использовать dotnet-gcdump vs dotnet-dump?
- Как найти memory leak через diagnostic tools?
- Что показывает PerfView, чего не показывают другие инструменты?
ASP.NET Core Performance
Kestrel Tuning
Basic Kestrel Configuration
// Program.cs
builder.WebHost.ConfigureKestrel(options =>
{
// Connection limits
options.Limits.MaxConcurrentConnections = 100_000;
options.Limits.MaxConcurrentUpgradedConnections = 100_000;
// TLS settings
options.Limits.MaxTlsHandshakesPerSecond = 5_000;
// Keep-alive
options.Limits.KeepAliveTimeout = TimeSpan.FromMinutes(2);
options.Limits.RequestHeadersTimeout = TimeSpan.FromSeconds(30);
// Request limits
options.Limits.MaxRequestBodySize = 10 * 1024 * 1024; // 10MB
options.Limits.MinResponseDataRate =
new MinDataRate(bytesPerSecond: 240, gracePeriod: TimeSpan.FromSeconds(10));
options.Limits.MinRequestDataRate =
new MinDataRate(bytesPerSecond: 240, gracePeriod: TimeSpan.FromSeconds(10));
});High-Throughput Configuration (10k RPS)
builder.WebHost.ConfigureKestrel(options =>
{
// Listen on multiple endpoints
options.ListenAnyIP(5000, listenOptions =>
{
listenOptions.Protocols = HttpProtocols.Http2;
listenOptions.UseHttps();
});
options.ListenAnyIP(5001, listenOptions =>
{
listenOptions.Protocols = HttpProtocols.Http1;
});
// HTTP/2 specific settings
options.Http2.MaxStreamsPerConnection = 100;
options.Http2.KeepAliveTimeout = TimeSpan.FromMinutes(2);
options.Http2.KeepAlivePingDelay = TimeSpan.FromSeconds(30);
options.Http2.KeepAlivePingTimeout = TimeSpan.FromSeconds(20);
options.Http2.MaxFrameSize = 16_384;
// HTTP/1.1 settings
options.Http1.ResponseDrainTimeout = TimeSpan.FromSeconds(30);
// Thread pool tuning
options.ThreadCount = Environment.ProcessorCount * 4;
});
// Thread pool configuration
builder.Services.Configure<ThreadPoolOptions>(options =>
{
options.MinThreads = Environment.ProcessorCount * 4;
options.WorkStealingMode = ThreadPoolWorkStealingMode.Preemptive;
});Kestrel Performance Settings
// appsettings.Production.json
{
"Kestrel": {
"Endpoints": {
"Http": {
"Url": "http://0.0.0.0:5000",
"Protocols": "Http1AndHttp2"
},
"Https": {
"Url": "https://0.0.0.0:5001",
"Protocols": "Http2",
"Certificate": {
"Path": "certs/server.pfx",
"Password": "<cert-password>"
}
}
},
"Limits": {
"MaxConcurrentConnections": 100000,
"MaxConcurrentUpgradedConnections": 100000,
"KeepAliveTimeout": "00:02:00",
"RequestHeadersTimeout": "00:00:30",
"MaxRequestBodySize": "10485760",
"MinResponseDataRate": {
"BytesPerSecond": 240,
"GracePeriod": "00:00:10"
}
},
"Http2": {
"MaxStreamsPerConnection": 100,
"KeepAliveTimeout": "00:02:00",
"KeepAlivePingDelay": "00:00:30",
"KeepAlivePingTimeout": "00:00:20"
}
},
"ThreadPool": {
"MinThreads": 32,
"WorkStealingMode": "Preemptive"
}
}Response Caching
In-Memory Response Cache
// Program.cs
builder.Services.AddResponseCaching();
var app = builder.Build();
app.UseResponseCaching();
// Middleware order matters!
app.Use(async (context, next) =>
{
// Set cache headers
context.Response.GetTypedHeaders().CacheControl = new Microsoft.Net.Http.Headers.CacheControlHeaderValue
{
Public = true,
MaxAge = TimeSpan.FromMinutes(5)
};
await next(context);
});
// Cacheable endpoints
app.MapGet("/api/products", async (ApplicationDb db) =>
{
var products = await db.Products.ToListAsync();
return Results.Ok(products);
}).CacheOutput(c => c.SetSlidingExpiration(TimeSpan.FromMinutes(5)));
// Non-cacheable
app.MapPost("/api/orders", async (CreateOrderRequest request, ApplicationDb db) =>
{
// ...
}).CacheOutput(c => c.SetNoCache());Response Caching with Vary Headers
app.MapGet("/api/users/{id}", async (int id, ApplicationDb db) =>
{
var user = await db.Users.FindAsync(id);
if (user == null) return Results.NotFound();
return Results.Ok(user);
}).CacheOutput(c => c
.SetCacheability(Microsoft.Net.Http.Headers.CacheControlHeaderValue.Public)
.SetMaxAge(TimeSpan.FromMinutes(10))
.SetSlidingExpiration(TimeSpan.FromMinutes(2))
.VaryByHeader("Accept")
.VaryByHeader("Accept-Language")
.VaryByQuery("fields")
);Cache Invalidation
public interface ICacheInvalidator
{
Task InvalidateAsync(string pattern);
}
public class CacheInvalidator : ICacheInvalidator
{
private readonly IDistributedCache _cache;
private readonly IMemoryCache _memoryCache;
public CacheInvalidator(IDistributedCache cache, IMemoryCache memoryCache)
{
_cache = cache;
_memoryCache = memoryCache;
}
public async Task InvalidateAsync(string pattern)
{
// Invalidate by pattern
var keys = await _cache.GetKeysAsync(pattern);
foreach (var key in keys)
await _cache.RemoveAsync(key);
// Clear memory cache
_memoryCache.Clear();
}
}
// Usage in CRUD operations
app.MapPost("/api/products", async (CreateProductRequest request, ApplicationDb db, ICacheInvalidator invalidator) =>
{
var product = new Product { Name = request.Name, Price = request.Price };
db.Products.Add(product);
await db.SaveChangesAsync();
// Invalidate product cache
await invalidator.InvalidateAsync("products:*");
return Results.Created($"/api/products/{product.Id}", product);
});Static File Serving
Compression and Caching
builder.Services.AddResponseCompression(options =>
{
options.EnableForHttps = true;
options.MimeTypes = new[]
{
"application/javascript",
"application/json",
"application/xml",
"text/css",
"text/html",
"text/plain",
"image/svg+xml"
};
options.Level = CompressionLevel.Optimal;
});
var app = builder.Build();
app.UseResponseCompression();
app.UseStaticFiles(new StaticFileOptions
{
// Compression
OnPrepareResponse = ctx =>
{
// Cache headers for static files
ctx.Context.Response.GetTypedHeaders().CacheControl = new Microsoft.Net.Http.Headers.CacheControlHeaderValue
{
Public = true,
MaxAge = TimeSpan.FromDays(30),
StaleWhileRevalidate = TimeSpan.FromDays(1)
};
// ETag
ctx.Context.Response.Headers.ETag = $"\"{ctx.File.Name}-{ctx.File.LastWriteTime:yyyyMMddHHmmss}\"";
}
});
// Static files with versioning
app.UseStaticFiles(new StaticFileOptions
{
FileProvider = new PhysicalFileProvider(Path.Combine(AppContext.BaseDirectory, "wwwroot")),
RequestPath = "/assets",
OnPrepareResponse = ctx =>
{
// Long-term cache for versioned files (hash in filename)
if (ctx.File.Name.Contains('.'))
{
ctx.Context.Response.GetTypedHeaders().CacheControl = new Microsoft.Net.Http.Headers.CacheControlHeaderValue
{
Public = true,
MaxAge = TimeSpan.FromDays(365),
MustRevalidate = false
};
}
}
});Middleware Order Impact
Performance-Critical Middleware Order
var app = builder.Build();
// 1. Exception handling — MUST be first
app.UseExceptionHandler("/error");
app.UseHsts();
// 2. HTTPS redirect
app.UseHttpsRedirection();
// 3. Routing — early routing enables endpoint matching
app.UseRouting();
// 4. Authentication — before authorization
app.UseAuthentication();
// 5. Authorization — after authentication
app.UseAuthorization();
// 6. Response compression — before caching (compresses response)
app.UseResponseCompression();
// 7. Response caching — after compression (cache compressed content)
app.UseResponseCaching();
// 8. Static files — serve directly, bypass app pipeline
app.UseStaticFiles();
// 9. Custom middleware
app.Use(async (context, next) =>
{
// Request timing
var sw = Stopwatch.StartNew();
await next(context);
sw.Stop();
context.Response.Headers.X-ResponseTime = $"{sw.ElapsedMilliseconds}ms";
});
// 10. Endpoints
app.MapControllers();
app.MapFallbackToFile("index.html"); // SPA fallbackMiddleware Performance Anti-patterns
// ПЛОХО — compression AFTER caching (caches uncompressed)
app.UseResponseCaching();
app.UseResponseCompression();
// ХОРОШО — compression BEFORE caching (caches compressed)
app.UseResponseCompression();
app.UseResponseCaching();
// ПЛОХО — logging EVERY request with string concatenation
app.Use(async (context, next) =>
{
var sw = Stopwatch.StartNew();
await next(context);
sw.Stop();
// String concat in hot path
_logger.LogInformation($"Request {context.Request.Method} {context.Request.Path} took {sw.ElapsedMilliseconds}ms");
});
// ХОРОШО — structured logging
app.Use(async (context, next) =>
{
var sw = Stopwatch.StartNew();
await next(context);
sw.Stop();
// Structured logging — no string concat
_logger.LogRequestDuration(context.Request.Method, context.Request.Path, sw.ElapsedMilliseconds);
});Async All the Way
Async Anti-patterns
// ПЛОХО — .Result blocks thread
public Product GetProduct(int id)
{
return _db.Products.FindAsync(id).Result; // Thread pool starvation!
}
// ПЛОХО — .Wait() blocks thread
public void SaveProduct(Product product)
{
_db.SaveChangesAsync().Wait(); // Thread pool starvation!
}
// ПЛОХО — async void (fire and forget)
public async void ProcessOrder(Order order) // NEVER use async void!
{
await _orderService.ProcessAsync(order);
}
// ПЛОХО — mixing sync and async (sync-over-async)
public IEnumerable<Product> GetAllProducts()
{
// Hidden async → sync conversion
return _db.Products.ToList(); // Blocks!
}Correct Async Patterns
// ХОРОШО — fully async
public async Task<Product?> GetProductAsync(int id, CancellationToken ct = default)
{
return await _db.Products.FindAsync([id], ct);
}
// ХОРОШО — async in controllers
[HttpGet("{id}")]
public async Task<ActionResult<Product>> GetProduct(int id, CancellationToken ct)
{
var product = await _db.Products.FindAsync([id], ct);
return product is not null ? Ok(product) : NotFound();
}
// ХОРОШО — fire and forget with proper handling
public void StartProcessingOrder(Order order)
{
_ = ProcessOrderAsync(order); // Fire and forget with _ prefix
}
private async Task ProcessOrderAsync(Order order)
{
try
{
await _orderService.ProcessAsync(order);
}
catch (Exception ex)
{
// Log error without crashing
_logger.LogError(ex, "Order processing failed for order {OrderId}", order.Id);
}
}Sync-over-Async Detection
// Middleware to detect sync-over-async
public class SyncOverAsyncDetectionMiddleware
{
private readonly RequestDelegate _next;
private readonly ILogger<SyncOverAsyncDetectionMiddleware> _logger;
public SyncOverAsyncDetectionMiddleware(RequestDelegate next, ILogger<SyncOverAsyncDetectionMiddleware> logger)
{
_next = next;
_logger = logger;
}
public async Task InvokeAsync(HttpContext context)
{
var isAsync = IsAsyncOperation();
await _next(context);
if (!isAsync && context.Request.Method == "GET")
{
_logger.LogWarning("Potential sync-over-async detected for {Path}", context.Request.Path);
}
}
private bool IsAsyncOperation()
{
// Check if we're on a thread pool thread
return !Thread.CurrentThread.IsThreadPoolThread;
}
}Advanced Optimization
Endpoint Routing Optimization
// Use MapGet/MapPost instead of controllers for simple endpoints
app.MapGet("/api/health", () => Results.Ok(new { status = "healthy" }));
app.MapPost("/api/health", () => Results.Ok());
// Use minimal APIs for high-throughput endpoints
app.MapGet("/api/products/{id:int}", async (int id, ApplicationDb db) =>
{
var product = await db.Products.FindAsync([id]);
return product is not null ? Results.Ok(product) : Results.NotFound();
});
// Use route constraints for early filtering
app.MapGet("/api/products/{id:guid}", async (Guid id, ApplicationDb db) =>
{
var product = await db.Products.FindAsync([id]);
return product is not null ? Results.Ok(product) : Results.NotFound();
});Buffer Optimization
// Use ArrayPool for temporary buffers
public class BufferOptimizedService
{
private readonly byte[] _poolBuffer = ArrayPool<byte>.Shared.Rent(8192);
public async Task<byte[]> ReadAndProcessAsync(Stream stream)
{
var buffer = ArrayPool<byte>.Shared.Rent(8192);
try
{
var totalRead = 0;
int bytesRead;
while ((bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
// Process buffer[0..bytesRead]
totalRead += bytesRead;
}
var result = new byte[totalRead];
Array.Copy(buffer, result, totalRead);
return result;
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}
}
}JSON Serialization Optimization
// Use optimized JSON serialization
builder.Services.AddControllers()
.AddJsonOptions(options =>
{
options.JsonSerializerOptions.PropertyNameCaseInsensitive = true;
options.JsonSerializerOptions.DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull;
options.JsonSerializerOptions.WriteIndented = false;
// Use pooled buffer
options.JsonSerializerOptions.MaxDepth = 64;
});
// Use source generators for compile-time serialization
// Program.cs
var builder = WebApplication.CreateBuilder(args);
builder.Services.AddControllers()
.AddJsonOptions(options =>
{
options.JsonSerializerOptions.Converters.Add(new JsonStringEnumConverter());
});
// Use IJsonTypeInfoResolver for custom serialization
builder.Services.Configure<JsonSerializerOptions>(options =>
{
options.TypeInfoResolverChain.Insert(0, AppJsonSerializerContext.Default);
});
[JsonSerializable(typeof(Product))]
[JsonSerializable(typeof(Order))]
[JsonSerializable(typeof(User))]
public partial class AppJsonSerializerContext : JsonSerializerContext { }Практика
Задание 1: Kestrel High-Throughput Configuration
Цель: Настроить Kestrel для 10k RPS.
Шаги:
- Настроить
MaxConcurrentConnections,ThreadCount - Настроить HTTP/2:
MaxStreamsPerConnection, keep-alive - Настроить ThreadPool:
MinThreads,WorkStealingMode - Протестировать с k6:
ramp-up → steady-state → spike - Измерить P95/P99 latency
Задание 2: Response Caching Strategy
Цель: Реализовать caching с invalidation.
Шаги:
- Добавить
AddResponseCaching() - Настроить cache headers для GET endpoints
- Реализовать
ICacheInvalidatorдля CRUD operations - Протестировать: cache hit ratio, invalidation correctness
- Измерить improvement в latency
Задание 3: Sync-over-Async Detection
Цель: Найти и устранить sync-over-async anti-patterns.
Шаги:
- Добавить
SyncOverAsyncDetectionMiddleware - Запустить приложение под нагрузкой
- Проанализировать logs — найти sync-over-async calls
- Рефакторить в fully async
- Измерить improvement в thread pool utilization
Контрольные вопросы
- Почему middleware order важен для performance?
- Как .Result и .Wait() влияют на throughput?
- Когда использовать HTTP/2 vs HTTP/1.1?
- Как настроить response caching для dynamic content?
- Какие Kestrel limits критичны для high-throughput?
CPU Profiling и Optimization
Flame Graphs
Что такое Flame Graph
Flame Graph — визуализация CPU profiling данных, где:
- Ширина колонки = % CPU времени, которое функция потребляет
- Высота = depth call stack
- Сверху → callers, снизу → callees
Reading Flame Graphs
Hot Path Detection:
┌─────────────────────────────────────────────────┐
│ Main (100%) │
│ ┌────────────────┐ ┌──────┐ ┌────────┐ │
│ │ Process(70%) │ │Init( │ │Load( │ │
│ │ │ │ 5%) │ │ 25%) │ │
│ │ ┌──────────┐ │ │ │ │ │ │
│ │ │Parse(50%)│ │ │ │ │ │ │
│ │ │ │ │ │ │ │ │ │
│ │ │ParseRaw()│ │ │ │ │ │ │
│ │ └──────────┘ │ │ │ │ │ │
│ └────────────────┘ └──────┘ └────────┘ │
└─────────────────────────────────────────────────┘
Parse() — hottest function (50% CPU)
└─ ParseRaw() — called by Parse()Generating Flame Graphs
# With dotnet-trace
dotnet trace collect -p <pid> --profile cpu-sampling --duration 00:01:00
dotnet trace report --format flamegraph --output flamegraph.html
# With perf (Linux)
sudo perf record -g -p <pid> -- sleep 10
sudo perf script | stackcollapse-perf.pl | flamegraph.pl > flamegraph.svg
# With Visual Studio
# 1. Debug → Performance Profiler
# 2. Select "CPU Usage"
# 3. Run → View CPU Usage Report → Flame GraphFlame Graph Analysis
Key metrics from flame graph:
1. Self Time — CPU time spent IN the function (not in callees)
→ High self time = function is computationally expensive
2. Inclusive Time — CPU time including callees
→ High inclusive time = function or its callees are expensive
3. Call Path — how we got to this function
→ Look for deep call stacks with high inclusive time
4. Leaf Functions — functions with no children
→ Hot leaf functions are the actual bottleneckJIT Inlining
JIT Inlining Decisions
JIT inlines methods when:
- Method is small (< 256 bytes IL typically)
- No virtual calls (devirtualization possible)
- No exceptions
- No loops (sometimes)
- Not marked
[MethodImpl(MethodImplOptions.NoInlining)]
Aggressive Inlining
using System.Runtime.CompilerServices;
public class OptimizedService
{
// Hint to JIT: inline this method aggressively
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int FastAdd(int a, int b) => a + b;
// Without hint — JIT decides
public int RegularAdd(int a, int b) => a + b;
// Prevent inlining
[MethodImpl(MethodImplOptions.NoInlining)]
public int NoInlineAdd(int a, int b) => a + b;
}When to Use AggressiveInlining
// GOOD — small accessor
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetCount() => _count;
// GOOD — simple math
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public double CalculateArea(double radius) => Math.PI * radius * radius;
// GOOD — delegate wrapper
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Task<T> ExecuteAsync<T>(Func<Task<T>> func) => func();
// BAD — complex logic (increases code size, hurts instruction cache)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string ComplexProcess(string input)
{
var parts = input.Split(',');
var result = new StringBuilder();
foreach (var part in parts)
{
result.Append(part.Trim().ToUpper());
if (result.Length > 0) result.Append(' ');
}
return result.ToString();
}Devirtualization
// JIT can devirtualize when:
// 1. Method is sealed
// 2. Method is override but class is sealed
// 3. Virtual call on local variable of concrete type
// DEVIRTUALIZABLE — sealed class
public sealed class SealedService : IService
{
public virtual string Process() => "sealed";
}
// DEVIRTUALIZABLE — override in sealed class
public class BaseService : IService
{
public virtual string Process() => "base";
}
public sealed class DerivedService : BaseService
{
public override string Process() => "derived";
}
// NOT devirtualizable — virtual call through interface
public class Client
{
public string Call(IService service)
{
// JIT can't know concrete type
return service.Process(); // Virtual call
}
}
// DEVIRTUALIZABLE — concrete type known
public class Client2
{
public string Call()
{
var service = new DerivedService();
return service.Process(); // Inlined!
}
}JIT Inlining Verification
# Enable JIT dump to verify inlining
set CORECLR_ENABLE_PROFILING=1
set CORECLR_JitDump=*
set COMPlus_JitDump=*
# Or programmatically
AppContext.SetSwitch("System.Diagnostics.Trace.IsEnabled", true);
Environment.SetEnvironmentVariable("COMPlus_JitDump", "*");
# Check output for [InliningAttempt] and [InliningSuccess]
# Example:
# [InliningAttempt] MyNamespace.MyClass::FastAdd(int, int)
# [InliningSuccess] MyNamespace.MyClass::FastAdd(int, int)SIMD Operations
Vector Basics
using System.Numerics;
public class SimdBenchmark
{
// Scalar addition
public int ScalarSum(int[] data)
{
int sum = 0;
for (int i = 0; i < data.Length; i++)
sum += data[i];
return sum;
}
// SIMD addition
public int VectorizedSum(int[] data)
{
int sum = 0;
int i = 0;
// Process 8 integers at a time (Vector<int>.Count)
for (; i + Vector<int>.Count <= data.Length; i += Vector<int>.Count)
{
var vector = new Vector<int>(data, i);
sum += vector.Sum();
}
// Tail — remaining elements
for (; i < data.Length; i++)
sum += data[i];
return sum;
}
}Vector256 / Avx
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
public class AvxService
{
// Check hardware support
public bool IsAvxSupported() => Avx.IsSupported;
// AVX float multiplication
public void AvxMultiply(float[] a, float[] b, float[] result)
{
if (!Avx.IsSupported)
{
// Fallback to scalar
for (int i = 0; i < a.Length; i++)
result[i] = a[i] * b[i];
return;
}
int i = 0;
var vecA = Vector256.Create(0f);
var vecB = Vector256.Create(0f);
var vecResult = Vector256.Create(0f);
// Process 8 floats at a time (Vector256<float>.Count = 8)
for (; i + 8 <= a.Length; i += 8)
{
vecA = Avx.LoadVector256(a, i);
vecB = Avx.LoadVector256(b, i);
vecResult = Avx.Multiply(vecA, vecB);
Avx.Store(result, i, vecResult);
}
// Tail
for (; i < a.Length; i++)
result[i] = a[i] * b[i];
}
}SIMD String Processing
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
public class SimdStringProcessor
{
// Fast character classification using SIMD
public static int CountUppercase(string input)
{
if (string.IsNullOrEmpty(input)) return 0;
int count = 0;
int i = 0;
if (Sse2.IsSupported && input.Length >= 16)
{
var lower = Vector128.Create((byte)'a');
var upper = Vector128.Create((byte)'z');
var one = Vector128.Create((byte)1);
for (; i + 16 <= input.Length; i += 16)
{
var chars = Sse2.LoadVector128(input, i);
// Check if each char is between 'a' and 'z'
var below = Sse2.CompareLessThan(chars, upper);
var above = Sse2.CompareGreaterThan(chars, lower);
var inRange = Sse2.And(below, above);
count += Sse2.MoveMask(inRange).Popcnt();
}
}
// Tail
for (; i < input.Length; i++)
{
if (char.IsUpper(input[i])) count++;
}
return count;
}
}Span Vectorization
public class SpanVectorizedService
{
// MemorySet with SIMD
public static void MemorySet(Span<byte> span, byte value)
{
int i = 0;
if (Avx.IsSupported && span.Length >= 32)
{
var vec = Avx.SetAll(value);
for (; i + 32 <= span.Length; i += 32)
Avx.Store(span, i, vec);
}
else if (Sse2.IsSupported && span.Length >= 16)
{
var vec = Sse2.SetAll(value);
for (; i + 16 <= span.Length; i += 16)
Sse2.Store(span, i, vec);
}
// Tail
for (; i < span.Length; i++)
span[i] = value;
}
// MemoryCompare with SIMD
public static bool MemoryEquals(ReadOnlySpan<byte> a, ReadOnlySpan<byte> b)
{
if (a.Length != b.Length) return false;
int i = 0;
if (Avx.IsSupported && a.Length >= 32)
{
for (; i + 32 <= a.Length; i += 32)
{
var va = Avx.LoadVector256(a, i);
var vb = Avx.LoadVector256(b, i);
var diff = Avx.Xor(va, vb);
if (Avx.TestAnyZeroAll(diff)) // Not zero
return false;
}
}
// Tail
for (; i < a.Length; i++)
if (a[i] != b[i]) return false;
return true;
}
}Struct vs Class Performance
Performance Trade-offs
| Factor | Struct | Class |
|---|---|---|
| Allocation | Stack (or inline) | Heap |
| Copy cost | Shallow copy (fast for small) | Reference copy (free) |
| GC pressure | None (if small) | Gen 0/1/2 |
| Nullability | Can't be null (unless nullable) | Can be null |
| Inheritance | No inheritance (except ValueType) | Full inheritance |
| Size | Small (< 16 bytes ideal) | Any size |
| Boxing | Yes, when cast to object | No |
When to Use Struct
// GOOD — small, immutable, value semantics
[StructLayout(LayoutKind.Sequential)]
public readonly record struct Point3D(double X, double Y, double Z);
// GOOD — frequently allocated, short-lived
public readonly record struct Money(decimal Amount, string Currency);
// GOOD — used in collections (avoids GC)
public struct CacheEntry
{
public readonly string Key;
public readonly object Value;
public readonly DateTime CreatedAt;
}
// BAD — large struct (copy cost > allocation cost)
public struct LargeData // ~2KB — DON'T DO THIS
{
public byte[] Data;
public string Name;
public List<int> Items;
public DateTime Timestamp;
}Struct Performance Benchmark
using BenchmarkDotNet.Attributes;
[MemoryDiagnoser]
public class StructVsClassBenchmark
{
private List<Point> _structList;
private List<PointClass> _classList;
private const int Count = 1_000_000;
[GlobalSetup]
public void Setup()
{
_structList = new List<Point>(Count);
_classList = new List<PointClass>(Count);
for (int i = 0; i < Count; i++)
{
_structList.Add(new Point(i, i));
_classList.Add(new PointClass { X = i, Y = i });
}
}
[Benchmark]
public List<Point> CreateStructures()
{
var list = new List<Point>(Count);
for (int i = 0; i < Count; i++)
list.Add(new Point(i, i));
return list;
}
[Benchmark]
public List<PointClass> CreateClasses()
{
var list = new List<PointClass>(Count);
for (int i = 0; i < Count; i++)
list.Add(new PointClass { X = i, Y = i });
return list;
}
}
public readonly record struct Point(int X, int Y);
public class PointClass { public int X; public int Y; }
// Expected results:
// | Method | Mean | Allocated | Alloc Ratio |
// |----------------|-----------|-----------|-------------|
// | CreateStructures | ~8ms | 350 KB | 0.5x |
// | CreateClasses | ~15ms | 700 KB | 1.0x |
// ← Structs: less allocation, faster GCПрактика
Задание 1: Hot Path Optimization через Inlining
Сценарий: Service с hot path в CalculateMetrics().
Шаги:
- Запустить benchmark для
CalculateMetrics() - Проанализировать flame graph — найти hottest function
- Добавить
[MethodImpl(MethodImplOptions.AggressiveInlining)] - Проверить devirtualization — сделать class sealed
- Измерить improvement
Задание 2: SIMD-based Computation
Сценарий: Матричные операции для ML inference.
Шаги:
- Реализовать scalar matrix multiply
- Реализовать AVX-optimized version
- Benchmark: 1024x1024 matrix multiply
- Сравнить: scalar vs SIMD
- Добавить fallback для систем без AVX
Задание 3: Struct vs Class Analysis
Сценарий: Collection of 10M short-lived objects.
Шаги:
- Создать class-based implementation
- Создать struct-based implementation
- Benchmark allocations + GC pressure
- Измерить impact on GC pause time
- Document findings
Контрольные вопросы
- Как читать flame graph?
- Когда JIT inlines method?
- Когда использовать SIMD?
- Когда struct vs class?
- Как verify JIT inlining?
Memory Profiling Advanced
Allocation Profiling
Identifying Allocation Hotspots
// Common allocation anti-patterns
// Anti-pattern 1: LINQ creates intermediate allocations
public List<string> BadApproach(IEnumerable<string> items)
{
// Creates: Where iterator + Select iterator + List
return items.Where(x => x.Length > 5).Select(x => x.ToUpper()).ToList();
}
// Optimized: Single-pass, no intermediates
public List<string> GoodApproach(IEnumerable<string> items)
{
var result = new List<string>();
foreach (var item in items)
{
if (item.Length > 5)
result.Add(item.ToUpper());
}
return result;
}
// Anti-pattern 2: String concatenation in loop
public string BadConcat(IEnumerable<string> parts)
{
var result = "";
foreach (var part in parts)
result += part + " "; // O(n²) — each += creates new string
return result.Trim();
}
// Optimized: StringBuilder
public string GoodConcat(IEnumerable<string> parts)
{
var sb = new StringBuilder();
foreach (var part in parts)
sb.Append(part).Append(' ');
return sb.ToString().Trim();
}Allocation Tracking
// Enable allocation tracking in production
builder.Services.Configure<ConsoleLifetimeOptions>(options =>
{
// Log allocations on each request
app.Use(async (context, next) =>
{
var startGen0 = GC.CollectionCount(0);
var startGen1 = GC.CollectionCount(1);
var startGen2 = GC.CollectionCount(2);
var startBytes = GC.GetTotalMemory(false);
await next(context);
var endGen0 = GC.CollectionCount(0);
var endGen1 = GC.CollectionCount(1);
var endGen2 = GC.CollectionCount(2);
var endBytes = GC.GetTotalMemory(false);
var allocated = endBytes - startBytes;
var gen0Allocs = endGen0 - startGen0;
if (gen0Allocs > 10 || allocated > 1024 * 1024) // >1MB
{
_logger.LogWarning(
"High allocation detected: {Allocated} bytes, Gen0: {Gen0Allocs}",
allocated, gen0Allocs);
}
});
});Span for Zero-Allocation Parsing
public class ZeroAllocationParser
{
// Allocating: string.Split creates array of strings
public string[] AllocatingSplit(string input)
{
return input.Split(','); // Creates array + string objects
}
// Zero-allocation: Span<char>.Split
public int ZeroAllocSplit(ReadOnlySpan<char> input, Span<char> output)
{
int count = 0;
int start = 0;
for (int i = 0; i < input.Length; i++)
{
if (input[i] == ',')
{
if (count < output.Length)
input.Slice(start, i - start).CopyTo(output[count++]);
start = i + 1;
}
}
// Last token
if (count < output.Length)
input.Slice(start).CopyTo(output[count++]);
return count;
}
// Zero-allocation CSV parsing
public record CsvRecord(string Field1, string Field2, string Field3);
public CsvRecord? ParseCsv(ReadOnlySpan<char> line)
{
var fields = new Span<char>(new char[3]);
var count = ZeroAllocSplit(line, fields);
if (count < 3) return null;
return new CsvRecord(
fields[0].ToString(),
fields[1].ToString(),
fields[2].ToString());
}
}GC Pause Time Analysis
GC Modes and Pause Behavior
// GC Latency Modes
// Default — balanced
GCSettings.LatencyMode = GCLatencyMode.SustainedLowLatency;
// Interactive — low pause, frequent collections
GCSettings.LatencyMode = GCLatencyMode.Interactive;
// Low latency — minimal pauses, for critical sections
GCSettings.LatencyMode = GCLatencyMode.LowLatency;
// Sustained low — for long-running low-latency operations
GCSettings.LatencyMode = GCLatencyMode.SustainedLowLatency;
// No GC — disable GC entirely (DANGEROUS!)
GCSettings.LatencyMode = GCLatencyMode.NoGCRegion;GC Pause Monitoring
// EventSource for GC events
public class GcMonitor : IDisposable
{
private readonly EventSource? _gcEventSource;
private readonly ILogger<GcMonitor> _logger;
public GcMonitor(ILogger<GcMonitor> logger)
{
_logger = logger;
// .NET 5+ has built-in GC event source
_gcEventSource = EventSource.GetEvents<GCEventSource>()?.CreateEventSource();
if (_gcEventSource != null)
{
_gcEventSource.EventWritten += OnGcEvent;
}
}
private void OnGcEvent(object? sender, EventWrittenEventArgs e)
{
// GC collection started
if (e.EventName == "GcStart")
{
var gen = GetGen(e.Payload);
_logger.LogDebug("GC Gen{Gen} started", gen);
}
// GC collection ended
if (e.EventName == "GcEnd")
{
var gen = GetGen(e.Payload);
var duration = GetDuration(e.Payload);
_logger.LogDebug("GC Gen{Gen} ended, duration: {Duration}ms", gen, duration);
}
}
private int GetGen(IEnumerable<object?> payload) => 0;
private double GetDuration(IEnumerable<object?> payload) => 0;
public void Dispose()
{
_gcEventSource?.EventWritten -= OnGcEvent;
}
}Minimizing GC Pauses
// Strategy 1: Object Pooling for frequent allocations
public class GcOptimizedService
{
private readonly ObjectPool<ByteArrayHolder> _pool;
public GcOptimizedService()
{
_pool = new DefaultObjectPool<ByteArrayHolder>(
new ByteArrayHolderPolicy(), 1000);
}
public void ProcessData(byte[] data)
{
var holder = _pool.Get();
try
{
holder.Buffer = data;
// Process...
}
finally
{
_pool.Return(holder);
}
}
}
public class ByteArrayHolder
{
public byte[]? Buffer;
}
public class ByteArrayHolderPolicy : PooledObjectPolicy<ByteArrayHolder>
{
public override ByteArrayHolder Create() => new();
public override void Return(ByteArrayHolder obj) => obj.Buffer = null;
}
// Strategy 2: Pre-allocate collections
public class PreAllocatedService
{
private List<MyObject> _cache = new(10000);
private Dictionary<string, MyObject> _lookup = new(10000);
public void Add(MyObject obj)
{
_cache.Add(obj);
_lookup[obj.Key] = obj;
}
}
// Strategy 3: Use ArrayPool for temporary buffers
public class BufferOptimizedService
{
public async Task ProcessAsync(Stream stream)
{
// Rent from pool instead of allocating
var buffer = ArrayPool<byte>.Shared.Rent(65536);
try
{
int totalRead = 0;
int bytesRead;
while ((bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length)) > 0)
{
// Process buffer[0..bytesRead]
totalRead += bytesRead;
}
return totalRead;
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}
}
}Object Graph Analysis
Finding Retention Paths
// Common retention patterns
// Pattern 1: Static collections holding references
public class StaticCache
{
// BAD — static list holds ALL objects forever
private static readonly List<CachedItem> _cache = new();
public static void Add(CachedItem item) => _cache.Add(item);
public static IReadOnlyList<CachedItem> GetAll() => _cache;
}
// GOOD — bounded cache with eviction
public class BoundedCache
{
private readonly ConcurrentDictionary<string, CachedItem> _cache = new();
private readonly int _maxSize;
public BoundedCache(int maxSize = 10000) => _maxSize = maxSize;
public void Add(string key, CachedItem item)
{
if (_cache.Count >= _maxSize)
{
// Evict oldest
var oldest = _cache.Keys.OrderBy(k => k).FirstOrDefault();
_cache.TryRemove(oldest, out _);
}
_cache[key] = item;
}
}
// Pattern 2: Event handler leaks
public class EventLeakExample
{
// BAD — event handler keeps subscriber alive
public event EventHandler<DataEvent>? DataReceived;
public void Subscribe(object subscriber)
{
// subscriber can NEVER be GC'd while publisher lives
DataReceived += OnDataReceived;
}
}
// GOOD — weak event pattern
public class WeakEventExample
{
private readonly List<WeakReference<EventHandler<DataEvent>>> _handlers = new();
public void Subscribe(EventHandler<DataEvent> handler)
{
_handlers.Add(new WeakReference<EventHandler<DataEvent>>(handler));
}
public void OnData(DataEvent data)
{
// Clean up dead references
_handlers.RemoveAll(wr => !wr.TryGetTarget(out _));
foreach (var wr in _handlers)
{
if (wr.TryGetTarget(out var handler))
handler(this, data);
}
}
}
// Pattern 3: CancellationTokenSource leak
public class CancellationLeak
{
private readonly List<CancellationTokenSource> _ctsList = new();
public async Task StartLongRunningOperationAsync()
{
var cts = new CancellationTokenSource();
_ctsList.Add(cts);
_ = Task.Run(async () =>
{
try
{
await DoWorkAsync(cts.Token);
}
finally
{
// Clean up
_ctsList.Remove(cts);
cts.Dispose();
}
});
}
}Finalizer Queue Analysis
// Monitor finalizer queue
public class FinalizerMonitor
{
public static void Report()
{
// Get finalizer queue length
var finalizersPending = GC.GetGCMemoryInfo().FinalizerQueueSize;
if (finalizersPending > 1000)
{
Console.WriteLine($"WARNING: {finalizersPending} objects in finalizer queue");
}
// GC memory info
var memoryInfo = GC.GetGCMemoryInfo();
Console.WriteLine($"Gen 0: {memoryInfo.GetGenerationSize(0)} bytes");
Console.WriteLine($"Gen 1: {memoryInfo.GetGenerationSize(1)} bytes");
Console.WriteLine($"Gen 2: {memoryInfo.GetGenerationSize(2)} bytes");
Console.WriteLine($"LOH: {memoryInfo.TotalCommittedMemory - memoryInfo.GetGenerationSize(0) - memoryInfo.GetGenerationSize(1) - memoryInfo.GetGenerationSize(2)} bytes");
}
}
// Avoid unnecessary finalizers
public class NoFinalizer : IDisposable
{
private bool _disposed;
// NO finalizer — cleaner GC
public void Dispose()
{
_disposed = true;
GC.SuppressFinalize(this);
}
}Handle Leaks
Common Handle Leak Patterns
// Pattern 1: FileStream not disposed
public async Task ReadFileAsync(string path)
{
// BAD — stream not disposed in exception case
var stream = new FileStream(path, FileMode.Open);
var content = await new StreamReader(stream).ReadToEndAsync();
return content; // If exception → stream leak!
}
// GOOD — using statement
public async Task ReadFileGoodAsync(string path)
{
await using var stream = new FileStream(path, FileMode.Open);
return await new StreamReader(stream).ReadToEndAsync();
}
// Pattern 2: Database connection leak
public async Task GetUsersAsync()
{
// BAD — connection not disposed
var connection = new SqlConnection("...");
await connection.OpenAsync();
var command = new SqlCommand("SELECT * FROM Users", connection);
var reader = await command.ExecuteReaderAsync();
// reader not disposed!
return new List<User>();
}
// GOOD — using statements
public async Task GetUsersGoodAsync()
{
await using var connection = new SqlConnection("...");
await connection.OpenAsync();
await using var command = new SqlCommand("SELECT * FROM Users", connection);
await using var reader = await command.ExecuteReaderAsync();
var users = new List<User>();
while (await reader.ReadAsync())
users.Add(new User { Name = reader.GetString(0) });
return users;
}
// Pattern 3: Timer not stopped
public class TimerLeak
{
private readonly System.Timers.Timer _timer;
public TimerLeak()
{
_timer = new System.Timers.Timer(1000);
_timer.Elapsed += OnTick;
_timer.Start();
// Never stopped → memory leak!
}
private void OnTick(object? sender, EventArgs e) { }
}
public class TimerNoLeak : IDisposable
{
private readonly System.Timers.Timer _timer;
private bool _disposed;
public TimerNoLeak()
{
_timer = new System.Timers.Timer(1000);
_timer.Elapsed += OnTick;
_timer.Start();
}
private void OnTick(object? sender, EventArgs e) { }
public void Dispose()
{
if (!_disposed)
{
_timer.Stop();
_timer.Dispose();
_disposed = true;
}
}
}Handle Leak Detection
// Monitor handles
public class HandleMonitor
{
private int _lastHandleCount;
public void CheckForLeaks()
{
var currentHandles = Process.GetCurrentProcess().HandleCount;
var delta = currentHandles - _lastHandleCount;
if (delta > 100)
{
Console.WriteLine($"WARNING: {delta} new handles since last check");
}
_lastHandleCount = currentHandles;
}
}
// Use !dumpheap -mt in dotnet-dump
// > !dumpheap -stat -type FileStream
// Find types with growing countПрактика
Задание 1: Memory Leak через Heap Dump
Сценарий: Приложение растёт с 100MB до 2GB за 24 часа.
Шаги:
dotnet gcdump collect -p <pid> -o before.gcdump- Запустить приложение под нагрузкой 2 часа
dotnet gcdump collect -p <pid> -o after.gcdumpdotnet gcdump report after.gcdump --top-by-count 50- Найти тип с максимальным ростом
dotnet gcdump report after.gcdump --roots --type <leaked-type>- Найти и устранить leak
Задание 2: GC Pause Optimization
Сценарий: API с P99 latency spikes каждые 30 секунд.
Шаги:
dotnet counters monitor -p <pid> --counters System.Runtime- Correlate GC collections with latency spikes
- Найти allocation hotspot (dotnet-trace)
- Оптимизировать: object pooling, pre-allocation, span
- Измерить improvement в P99 latency
Задание 3: Allocation-Free Hot Path
Сценарий: Serialization endpoint создаёт 5MB allocations per request.
Шаги:
- Benchmark serialization
- Найти allocation sources (dotnet-gcdump)
- Оптимизировать: - Use
- Verify: < 100KB allocations per request
Span<char> for parsing
- Use ArrayPool<byte> for buffers
- Pre-allocate JSON serializers
- Avoid LINQ intermediates
Контрольные вопросы
- Как найти allocation hotspot?
- Как минимизировать GC pause time?
- Как найти memory leak?
- Какие common handle leaks?
- Что такое finalizer queue и почему это плохо?
Load Testing
k6 — Script Writing
Installation
# Install k6
# Windows (winget)
winget install k6
# macOS (brew)
brew install k6
# Linux (apt)
sudo apt-get install apt-transport-https dirmngr gnupg
sudo curl -fsSL https://apt.k6.io/apt.gpg | sudo gpg --dearmor -o /usr/share/keyrings/k6-archive-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://apt.k6.io/apt bookworm main" | sudo tee /etc/apt/sources.list.d/k6.list
sudo apt-get update
sudo apt-get install k6Basic k6 Script
// basic.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '30s', target: 100 }, // ramp-up to 100 VUs
{ duration: '2m', target: 100 }, // stay at 100 VUs
{ duration: '30s', target: 200 }, // ramp-up to 200 VUs
{ duration: '2m', target: 200 }, // stay at 200 VUs
{ duration: '30s', target: 0 }, // ramp-down to 0
],
};
export default function () {
const res = http.get('https://api.example.com/products');
check(res, {
'status is 200': (r) => r.status === 200,
'response time < 200ms': (r) => r.timings.duration < 200,
'has products': (r) => JSON.parse(r.body).length > 0,
});
sleep(1); // 1 second think time
}Running k6
# Run basic script
k6 run basic.js
# Run with local output
k6 run --out json=results.json basic.js
# Run with cloud reporting
k6 cloud basic.js
# Run with custom VUs
k6 run --vus 50 --duration 2m basic.js
# Dry run (validate script)
k6 run --dry-run basic.jsk6 Thresholds
Defining Thresholds
// thresholds.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '1m', target: 100 },
{ duration: '5m', target: 100 },
{ duration: '1m', target: 200 },
{ duration: '5m', target: 200 },
],
thresholds: {
// HTTP request duration thresholds
'http_req_duration': [
'p(50)<100', // 50th percentile < 100ms
'p(90)<300', // 90th percentile < 300ms
'p(95)<500', // 95th percentile < 500ms
'p(99)<1000', // 99th percentile < 1000ms
'avg<200', // average < 200ms
],
// Error rate threshold
'http_req_failed': [
'rate<0.01', // fail rate < 1%
],
// Custom metrics
'user_logins': [
'count>1000', // at least 1000 logins
],
},
};
export default function () {
const res = http.get('https://api.example.com/products');
check(res, {
'status is 200': (r) => r.status === 200,
'response time < 500ms': (r) => r.timings.duration < 500,
});
sleep(1);
}Custom Metrics
// custom-metrics.js
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Counter, Gauge, Rate, Trend } from 'k6/metrics';
// Custom metrics
const orderCount = new Counter('order_count');
const orderValue = new Gauge('order_value');
const cartSize = new Rate('cart_size');
const processingTime = new Trend('processing_time');
export const options = {
stages: [
{ duration: '30s', target: 50 },
{ duration: '3m', target: 50 },
{ duration: '30s', target: 0 },
],
};
export default function () {
// Simulate browsing products
const productsRes = http.get('https://api.example.com/products');
check(productsRes, { 'products loaded': (r) => r.status === 200 });
sleep(Math.random() * 2);
// Add to cart
const cartRes = http.post('https://api.example.com/cart', JSON.stringify({ productId: 1 }), {
headers: { 'Content-Type': 'application/json' },
});
check(cartRes, { 'added to cart': (r) => r.status === 201 });
// Place order
const orderRes = http.post('https://api.example.com/orders', JSON.stringify({
productId: 1,
quantity: 2,
}), {
headers: { 'Content-Type': 'application/json' },
});
if (orderRes.status === 201) {
orderCount.add(1);
orderValue.add(49.99);
cartSize.add(1);
processingTime.add(orderRes.timings.duration);
}
check(orderRes, {
'order placed': (r) => r.status === 201,
'order response < 500ms': (r) => r.timings.duration < 500,
});
sleep(1);
}Load Test Patterns
Ramp-Up Pattern
// ramp-up.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '1m', target: 50 },
{ duration: '1m', target: 100 },
{ duration: '1m', target: 200 },
{ duration: '5m', target: 200 },
],
};
export default function () {
const res = http.get('https://api.example.com/products');
check(res, { 'status 200': (r) => r.status === 200 });
sleep(1);
}Steady-State Pattern
// steady-state.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '5m', target: 100 },
{ duration: '30m', target: 100 }, // Long steady state
{ duration: '5m', target: 0 },
],
};
export default function () {
// Mix of read and write operations (80/20)
if (Math.random() < 0.8) {
const res = http.get('https://api.example.com/products');
check(res, { 'status 200': (r) => r.status === 200 });
} else {
const res = http.post('https://api.example.com/orders', JSON.stringify({ item: 1 }), {
headers: { 'Content-Type': 'application/json' },
});
check(res, { 'order created': (r) => r.status === 201 });
}
sleep(Math.random() * 2);
}Spike Pattern
// spike.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '1m', target: 50 },
{ duration: '30s', target: 50 },
{ duration: '10s', target: 500 }, // SPIKE!
{ duration: '30s', target: 500 },
{ duration: '10s', target: 50 },
{ duration: '1m', target: 50 },
],
};
export default function () {
const res = http.get('https://api.example.com/products');
check(res, { 'status 200': (r) => r.status === 200 });
sleep(1);
}Soak Test Pattern
// soak-test.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '15m', target: 100 },
{ duration: '4h', target: 100 }, // 4 hour soak test
{ duration: '15m', target: 0 },
],
thresholds: {
'http_req_duration': ['p(95)<500'],
'http_req_failed': ['rate<0.01'],
},
};
export default function () {
// Simulate realistic user journey
const loginRes = http.post('https://api.example.com/login', JSON.stringify({
username: 'user' + __VU,
password: 'password',
}), { headers: { 'Content-Type': 'application/json' } });
const token = loginRes.json('token');
const productsRes = http.get('https://api.example.com/products', {
headers: { 'Authorization': `Bearer ${token}` },
});
check(productsRes, { 'products loaded': (r) => r.status === 200 });
sleep(Math.random() * 3);
const orderRes = http.post('https://api.example.com/orders', JSON.stringify({
productId: Math.floor(Math.random() * 100),
quantity: Math.floor(Math.random() * 5) + 1,
}), {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${token}`,
},
});
check(orderRes, { 'order created': (r) => r.status === 201 });
sleep(Math.random() * 5);
}Metrics Interpretation
Latency Percentiles
| Percentile | Meaning | SLA Target |
|---|---|---|
| P50 (Median) | 50% of requests faster than this | Baseline |
| P90 | 90% of requests faster than this | Acceptable |
| P95 | 95% of requests faster than this | Good |
| P99 | 99% of requests faster than this | Excellent |
// Example metrics interpretation
{
"http_req_duration": {
"med": 45.2, // 50% of requests: 45ms
"avg": 67.8, // Average: 68ms
"p(90)": 120.5, // 90% of requests: <121ms
"p(95)": 185.3, // 95% of requests: <185ms
"p(99)": 450.7, // 99% of requests: <451ms
"min": 12.1, // Fastest: 12ms
"max": 2340.5, // Slowest: 2.3s
}
}Throughput vs Error Rate
// Interpreting throughput
{
"http_reqs": {
"count": 125000, // Total requests
"rate": 104.2, // Requests per second
}
}
// Interpreting errors
{
"http_req_failed": {
"passes": { "count": 124000, "rate": 0.992 }, // 99.2% success
"fails": { "count": 1000, "rate": 0.008 }, // 0.8% failure
}
}
// Good metrics:
// - Throughput: stable under load
// - P99 latency: within budget
// - Error rate: < 0.1%
// - No degradation over time (soak test)JMeter — Alternative Tool
JMX Script
<?xml version="1.0" encoding="UTF-8"?>
<jmeterTestPlan version="1.2" properties="5.0" jmeter="5.6.2">
<hashTree>
<TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="API Load Test">
<elementProp name="TestPlan.user_defined_variables" elementType="Arguments">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="TestPlan.comments"></stringProp>
<boolProp name="TestPlan.functional_mode">false</boolProp>
<boolProp name="TestPlan.serialize_threadgroups">false</boolProp>
<elementProp name="TestPlan.user_defined_variables" elementType="Arguments">
<collectionProp name="Arguments.arguments"/>
</elementProp>
</TestPlan>
<hashTree>
<ThreadGroup guiclass="ThreadGroupGui" testclass="ThreadGroup" testname="API Users">
<stringProp name="ThreadGroup.on_sample_error">continue</stringProp>
<elementProp name="ThreadGroup.main_controller" elementType="LoopController">
<boolProp name="LoopController.continue_forever">false</boolProp>
<intProp name="LoopController.loops">-1</intProp>
</elementProp>
<stringProp name="ThreadGroup.num_threads">100</stringProp>
<stringProp name="ThreadGroup.ramp_time">10</stringProp>
<boolProp name="ThreadGroup.scheduler">true</boolProp>
<stringProp name="ThreadGroup.duration">1800</stringProp>
<stringProp name="ThreadGroup.delay">0</stringProp>
</ThreadGroup>
<hashTree>
<HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Get Products">
<elementProp name="HTTPsampler.Arguments" elementType="Arguments">
<collectionProp name="Arguments.arguments"/>
</elementProp>
<stringProp name="HTTPSampler.domain">api.example.com</stringProp>
<stringProp name="HTTPSampler.port">443</stringProp>
<stringProp name="HTTPSampler.protocol">https</stringProp>
<stringProp name="HTTPSampler.contentEncoding"></stringProp>
<stringProp name="HTTPSampler.path">/api/products</stringProp>
<stringProp name="HTTPSampler.method">GET</stringProp>
<boolProp name="HTTPSampler.follow_redirects">true</boolProp>
<boolProp name="HTTPSampler.auto_redirects">false</boolProp>
<boolProp name="HTTPSampler.use_keepalive">true</boolProp>
<boolProp name="HTTPSampler.DO_MULTIPART_POST">false</boolProp>
</HTTPSamplerProxy>
<hashTree/>
<ResponseChecker guiclass="ResponseCheckerGui" testclass="ResponseChecker" testname="Response Validator">
<collectionProp name="ResponseChecker.responses">
<stringProp name="20790.1234">200</stringProp>
</collectionProp>
</ResponseChecker>
<hashTree/>
</hashTree>
</hashTree>
</hashTree>
</jmeterTestPlan>Running JMeter
# Non-GUI mode (for CI/CD)
jmeter -n -t api-load-test.jmx -l results.jtl -e -o ./report
# With properties
jmeter -n -t api-load-test.jmx -Jthreads=100 -Jduration=1800 -l results.jtlПрактика
Задание 1: k6 Script для API Load Test
Цель: Написать realistic traffic pattern для API.
Требования:
- 80/20 read/write ratio
- Authentication flow (login → token → requests)
- Ramp-up → steady-state → ramp-down
- Thresholds: P95 < 500ms, error rate < 1%
- Custom metrics: order count, processing time
Задание 2: Soak Test 4 Hours
Цель: Identify degradation patterns over time.
Требования:
- 100 VUs for 4 hours
- Monitor memory growth (dotnet-counters)
- Track P95/P99 latency over time
- Identify memory leaks (growing heap)
- Identify connection pool exhaustion
Задание 3: Performance Regression Dashboard
Цель: Historical trend tracking.
Требования:
- Run k6 on every PR
- Store results in timeseries DB (InfluxDB/Prometheus)
- Dashboard with P95/P99, throughput, error rate
- Alert on regression > 10%
Контрольные вопросы
- Как интерпретировать P95/P99 latency?
- В чём разница между ramp-up и steady-state?
- Зачем нужен soak test?
- Какие thresholds критичны для production API?
- k6 vs JMeter?
Performance Architecture
Latency Budget Allocation
End-to-End Latency Budget
Client Request (100ms budget)
├── API Gateway: 5ms (5%)
├── Auth Service: 10ms (10%)
├── Order Service: 20ms (20%)
│ ├── Validate: 5ms
│ ├── Calculate: 10ms
│ └── Persist: 5ms
├── Payment Service: 30ms (30%)
│ ├── Gateway: 20ms
│ └── Confirmation:10ms
├── Notification: 10ms (10%)
├── Cache Layer: 5ms (5%)
├── Database: 10ms (10%)
└── Network Overhead:5ms (5%)
─────────────────────────────────
Total: 100msBudget Decomposition per Service
// Latency budget enforcement middleware
public class LatencyBudgetMiddleware
{
private readonly RequestDelegate _next;
private readonly ILogger<LatencyBudgetMiddleware> _logger;
private readonly LatencyBudgetConfig _config;
public LatencyBudgetMiddleware(
RequestDelegate next,
ILogger<LatencyBudgetMiddleware> logger,
IOptions<LatencyBudgetConfig> config)
{
_next = next;
_logger = logger;
_config = config.Value;
}
public async Task InvokeAsync(HttpContext context)
{
var sw = Stopwatch.StartNew();
var budget = _config.GetBudget(context.Path);
try
{
await _next(context);
}
finally
{
sw.Stop();
var remainingBudget = budget - sw.ElapsedMilliseconds;
if (remainingBudget < 0)
{
_logger.LogWarning(
"Budget exceeded: {Path} took {Elapsed}ms (budget: {Budget}ms)",
context.Path, sw.ElapsedMilliseconds, budget);
// Track for alerting
await TrackBudgetViolationAsync(context.Path, sw.ElapsedMilliseconds, budget);
}
}
}
private async Task TrackBudgetViolationAsync(PathString path, long elapsed, long budget)
{
// Send to telemetry
await _telemetryService.TrackAsync(new BudgetViolationEvent
{
Path = path.ToString(),
ElapsedMs = elapsed,
BudgetMs = budget,
Timestamp = DateTime.UtcNow
});
}
}
public class LatencyBudgetConfig
{
public Dictionary<string, long> Endpoints { get; set; } = new()
{
["/api/orders"] = 50, // 50ms budget
["/api/products"] = 30, // 30ms budget
["/api/payments"] = 100, // 100ms budget
["/api/search"] = 200, // 200ms budget
};
public long GetBudget(PathString path)
{
foreach (var (endpoint, budget) in Endpoints)
{
if (path.StartsWithSegments(endpoint))
return budget;
}
return 100; // Default budget
}
}Throughput vs Latency Trade-offs
Queueing Theory Basics
Little's Law: L = λ × W
L = average number of items in system
λ = arrival rate (requests/sec)
W = average time in system (latency)
Example:
If λ = 100 req/s and W = 50ms
Then L = 100 × 0.05 = 5 requests in flight
Throughput formula:
Throughput = 1 / (Latency + QueueWait)
As queue fills:
- Latency increases (wait time)
- Throughput decreases (queueing delay)Optimizing for Throughput
// High-throughput configuration
builder.WebHost.ConfigureKestrel(options =>
{
options.Limits.MaxConcurrentConnections = 100_000;
options.ThreadCount = Environment.ProcessorCount * 8;
});
// Connection pooling for database
builder.Services.AddDbContextPool<ApplicationDb>(options =>
options.UseSqlServer(connectionString, sql =>
sql.EnableRetryOnFailure(3, TimeSpan.FromSeconds(5), TimeSpan.Zero)));
// Parallel processing
public async Task<List<Order>> ProcessOrdersAsync(IEnumerable<Order> orders)
{
// Process orders in parallel (bounded)
var parallelOptions = new ParallelOptions
{
MaxDegreeOfParallelism = Environment.ProcessorCount * 4
};
await Parallel.ForEachAsync(orders, parallelOptions, async (order, ct) =>
{
await ProcessOrderAsync(order, ct);
});
}
// Batch processing
public async Task<List<Result>> ProcessBatchAsync(IEnumerable<Item> items, int batchSize = 100)
{
var results = new List<Result>();
foreach (var batch in items.Batch(batchSize))
{
var batchResult = await ProcessBatchAsync(batch);
results.AddRange(batchResult);
}
return results;
}Optimizing for Latency
// Low-latency configuration
public class LowLatencyService
{
private readonly IMemoryCache _cache;
private readonly ConcurrentDictionary<string, Task<Result>> _prefetchCache;
public LowLatencyService(IMemoryCache cache)
{
_cache = cache;
_prefetchCache = new ConcurrentDictionary<string, Task<Result>>();
}
// Pre-fetch critical data
public async Task<Result> GetResultAsync(string key)
{
// Check memory cache first (sub-microsecond)
if (_cache.TryGetValue(key, out Result? cached))
return cached!;
// Check prefetch cache (avoid duplicate in-flight requests)
if (_prefetchCache.TryGetValue(key, out var task))
return await task;
// Prefetch and cache
var fetchTask = FetchAndCacheAsync(key);
_prefetchCache[key] = fetchTask;
try
{
return await fetchTask;
}
finally
{
_prefetchCache.TryRemove(key, out _);
}
}
private async Task<Result> FetchAndCacheAsync(string key)
{
var result = await _database.GetAsync(key);
_cache.Set(key, result, TimeSpan.FromSeconds(30));
return result;
}
}Connection Pooling Optimization
Database Connection Pooling
// Optimized connection string
builder.Services.AddDbContext<ApplicationDb>(options =>
options.UseSqlServer(@"Server=db;Database=app;
User Id=user;Password=pass;
Max Pool Size=100;
Min Pool Size=10;
Connect Timeout=30;
Packet Size=8192;
Application Name=MyApp;
Enlist=false;"));
// Connection pool monitoring
public class ConnectionPoolMonitor
{
private readonly ILogger<ConnectionPoolMonitor> _logger;
public ConnectionPoolMonitor(ILogger<ConnectionPoolMonitor> logger)
{
_logger = logger;
}
public void ReportPoolStatus()
{
// Use performance counters
var connectionString = "Server=db;Database=app;...";
// Monitor via dotnet-counters:
// Microsoft.Data.SqlClient:sqlclient_stats_active_connections
// Microsoft.Data.SqlClient:sqlclient_stats_total_connections
// Microsoft.Data.SqlClient:sqlclient_stats_pooled_connections
// Microsoft.Data.SqlClient:sqlclient_stats_unpooled_connections
}
}HTTP Connection Pooling
// HttpClient with connection pooling
builder.Services.AddHttpClient<MyApiService>(client =>
{
client.BaseAddress = new Uri("https://api.example.com/");
client.DefaultRequestHeaders.Add("Accept", "application/json");
})
.ConfigurePrimaryHttpMessageHandler(() => new HttpClientHandler
{
MaxConnectionsPerServer = 100,
UseCookies = false,
AllowAutoRedirect = false,
})
.ConfigureHttpClient((provider, client) =>
{
client.Timeout = TimeSpan.FromSeconds(30);
client.DefaultRequestHeaders.Add("User-Agent", "MyApp/1.0");
})
.SetHandlerLifetime(TimeSpan.FromHours(2)); // Recycle handlers
// Primary key: (scheme, host, port)
// Max connections per server: default 2, increase for high-throughputRedis Connection Pooling
// StackExchange.Redis configuration
builder.Services.AddStackExchangeRedisCache(options =>
{
options.Configuration = new ConfigurationOptions
{
EndPoints = { "redis:6379" },
DefaultDatabase = 0,
ConnectRetry = 3,
ConnectTimeout = 5000,
SyncTimeout = 5000,
AsyncTimeout = 5000,
KeepAlive = 30,
TieBreaker = "",
AbortOnConnectFail = false,
SocketManager = SocketManager.Pool, // Thread pool for socket reads
PoolSize = 50, // Connection pool size
};
options.InstanceName = "MyApp:";
});CDN и Edge Caching
CDN Strategy
// Configure CDN headers
app.MapGet("/api/products/{id}", async (int id, ApplicationDb db) =>
{
var product = await db.Products.FindAsync([id]);
if (product == null) return Results.NotFound();
return Results.Ok(product)
.WithHeaders(
ResponseHeadersExtensions.SetCacheControl("public, max-age=300, stale-while-revalidate=60"),
ResponseHeadersExtensions.SetCDNHeaders(true));
});
// CDN cache invalidation
public class CdnCacheInvalidator
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly string _cdnApiKey;
private readonly string _cdnEndpoint;
public CdnCacheInvalidator(IHttpClientFactory factory, IConfiguration config)
{
_httpClientFactory = factory;
_cdnApiKey = config["CdnApiKey"];
_cdnEndpoint = config["CdnEndpoint"];
}
public async Task InvalidateAsync(string url)
{
var client = _httpClientFactory.CreateClient();
client.DefaultRequestHeaders.Add("X-API-Key", _cdnApiKey);
await client.PostAsync($"{_cdnEndpoint}/purge",
new StringContent($"{{\"urls\":[\"{url}\"]}}", Encoding.UTF8, "application/json"));
}
}Database Query Optimization
Slow Query Identification
// Enable EF Core logging for slow queries
builder.Services.AddDbContext<ApplicationDb>(options =>
{
options.UseSqlServer(connectionString)
.LogTo(Console.WriteLine, LogLevel.Information)
.EnableSensitiveDataLogging()
.EnableDetailedErrors();
});
// Slow query detection middleware
public class SlowQueryMiddleware
{
private readonly RequestDelegate _next;
private readonly ILogger<SlowQueryMiddleware> _logger;
private readonly TimeSpan _slowQueryThreshold = TimeSpan.FromMilliseconds(100);
public SlowQueryMiddleware(RequestDelegate next, ILogger<SlowQueryMiddleware> logger)
{
_next = next;
_logger = logger;
}
public async Task InvokeAsync(HttpContext context)
{
var sw = Stopwatch.StartNew();
await _next(context);
sw.Stop();
if (sw.Elapsed > _slowQueryThreshold)
{
_logger.LogWarning(
"Slow query detected: {Path} took {Elapsed}ms",
context.Path, sw.ElapsedMilliseconds);
}
}
}
// EF Core query optimization
public class OptimizedRepository
{
private readonly ApplicationDb _db;
public OptimizedRepository(ApplicationDb db) => _db = db;
// BAD: N+1 query problem
public async Task<List<ProductDto>> GetAllBadAsync()
{
var products = await _db.Products.ToListAsync();
return products.Select(p => new ProductDto
{
Name = p.Name,
CategoryName = p.Category.Name // N+1 query!
}).ToList();
}
// GOOD: Single query with Include
public async Task<List<ProductDto>> GetAllGoodAsync()
{
return await _db.Products
.Include(p => p.Category)
.Select(p => new ProductDto
{
Name = p.Name,
CategoryName = p.Category!.Name
})
.ToListAsync();
}
// GOOD: Projection (only selected columns)
public async Task<List<ProductDto>> GetAllProjectedAsync()
{
return await _db.Products
.AsNoTracking()
.Where(p => p.IsActive)
.Select(p => new ProductDto
{
Id = p.Id,
Name = p.Name,
Price = p.Price
})
.ToListAsync();
}
}Практика
Задание 1: Latency Budget для Multi-Service Chain
Цель: Спроектировать и реализовать latency budget.
Шаги:
- Определить end-to-end budget (например, 200ms)
- Decompose per service (API Gateway, Auth, Order, Payment)
- Реализовать middleware для tracking
- Настроить alerting при violation
- Document findings
Задание 2: Connection Pool Optimization
Цель: Оптимизировать connection pools через profiling data.
Шаги:
- Запустить приложение под нагрузкой
- Проанализировать connection pool stats (dotnet-counters)
- Найти bottleneck: DB, Redis, HTTP
- Оптимизировать pool size, timeout, lifetime
- Измерить improvement
Задание 3: Performance SLA Monitoring
Цель: Создать automatic alerting.
Шаги:
- Определить SLA: P99 < 500ms, error rate < 0.1%
- Настроить OpenTelemetry для tracing
- Интегрировать с Prometheus/Grafana
- Настроить alerts при violation
- Создать runbook для incidents
Контрольные вопросы
- Что такое latency budget и как его allocate?
- Как throughput влияет на latency?
- Как оптимизировать connection pooling?
- Когда использовать CDN?
- Как identify slow queries?
Advanced JIT и Runtime Optimization
Tiered Compilation
Что такое Tiered Compilation
Tiered Compilation — JIT компилирует методы в два этапа:
- Tier 0 (ELT — Early-Layer Tier): Быстрая, простая компиляция для быстрого старта
- Tier 1 (Optimized): Полная оптимизация после warming
Method Call Timeline:
┌─────────────────────────────────────────────────┐
│ Call 1-3: Interpreted (no JIT yet) │
│ Call 4-10: Tier 0 (fast, simple JIT) │
│ Call 11+: Tier 1 (optimized JIT) │
│ ↑ JIT recompiles with profile data │
└─────────────────────────────────────────────────┘Tiered Compilation Configuration
// Enable/Disable tiered compilation
// appsettings.json
{
"RuntimeOptions": {
"ConfigProperties": {
"System.Runtime.TieredCompilation": true,
"System.Runtime.TieredCompilation.QuickJit": true,
"System.Runtime.TieredCompilation.OptimizeVirtualCalls": true,
"System.Runtime.TieredCompilation.OptimizeArrays": true
}
}
}
// Program.cs — programmatically
AppContext.SetSwitch("System.Runtime.TieredCompilation", true);
AppContext.SetSwitch("System.Runtime.TieredCompilation.QuickJit", true);Tier 0 vs Tier 1 Differences
| Feature | Tier 0 | Tier 1 |
|---|---|---|
| Compilation speed | Fast | Slower |
| Code quality | Basic | Optimized |
| Inlining | Limited | Full |
| Loop optimization | Basic | Full (unrolling, vectorization) |
| Devirtualization | Limited | Full |
| Bounds check elimination | No | Yes |
| CPU performance | ~80-90% of Tier 1 | 100% |
QuickJit — Быстрый Старт
// QuickJit: Tier 0 с агрессивным inlining
// Ускоряет startup, но код ~90% от Tier 1
AppContext.SetSwitch("System.Runtime.TieredCompilation.QuickJit", true);
// QuickJitForLoops: Tier 0 loops оптимизированы
AppContext.SetSwitch("System.Runtime.TieredCompilation.QuickJitForLoops", true);
// QuickJitForInlinees: методы, вызываемые часто, сразу Tier 1
AppContext.SetSwitch("System.Runtime.TieredCompilation.QuickJitForInlinees", true);Disabling Tiered Compilation
// Для latency-critical приложений (убирает JIT recompilation pauses)
AppContext.SetSwitch("System.Runtime.TieredCompilation", false);
// Только Tier 1 — медленный startup, стабильная performance
// Подходит для: real-time systems, trading platformsR2R — Ready-to-Run
Что такое R2R
R2R (Ready-to-Run) — pre-JIT компиляция assemblies в native code.
Traditional JIT:
Deploy IL → Runtime JIT compile → Execute
R2R:
Deploy R2R native + IL fallback → Execute → JIT optimize (optional)Creating R2R Deployments
# Publish with R2R (Ngen)
dotnet publish -c Release -r win-x64 /p:PublishReadyToRun=true
# Publish with R2R + trimming
dotnet publish -c Release -r win-x64 \
/p:PublishReadyToRun=true \
/p:PublishTrimmed=true \
/p:TrimMode=link
# R2R with optimization levels
dotnet publish -c Release -r win-x64 \
/p:PublishReadyToRun=true \
/p:ReadyToRunLink=true \
/p:PublishSingleFile=trueR2R vs JIT vs Native AOT
| Feature | JIT | R2R | Native AOT |
|---|---|---|---|
| Startup time | Slow | Fast | Fastest |
| Peak performance | Best | Good | Good |
| App size | Small | Medium | Largest |
| Reflection | Full | Full | Limited |
| Dynamic code gen | Yes | Yes | No |
| Platform | Windows/Linux/macOS | Windows/Linux/macOS | Windows/Linux/macOS |
| Trimming | No | No | Yes |
Native AOT
Native AOT Basics
# Check AOT support
dotnet --list-runtimes
# Publish Native AOT
dotnet publish -c Release -r linux-x64 /p:PublishAot=true
# Windows
dotnet publish -c Release -r win-x64 /p:PublishAot=true
# macOS ARM64
dotnet publish -c Release -r osx-arm64 /p:PublishAot=trueAOT Compatibility
// AOT-compatible code
public class AotCompatibleService
{
// Virtual calls — supported
public virtual string Process() => "processed";
// Reflection with known types — supported
public object GetDefaultValue(Type type)
{
return type.IsValueType ? Activator.CreateInstance(type) : null;
}
// Dynamic method generation — NOT supported
// public DynamicMethod CreateMethod() => ...; // ❌
}Reflection Workarounds for AOT
// Use Source Generators instead of Reflection
// MySerializerGenerator.cs (source generator)
// Generates: MySerializer_Types.g.cs
// AOT-compatible serialization
[JsonSourceGenerationOptions(
PropertyNameCaseInsensitive = true,
NumberHandling = JsonNumberHandling.AllowReadingFromString)]
[JsonSerializable(typeof(Product))]
[JsonSerializable(typeof(Order))]
[JsonSerializable(typeof(User))]
[JsonSerializable(typeof(List<Product>))]
[JsonSerializable(typeof(List<Order>))]
public partial class AppJsonSerializerContext : JsonSerializerContext { }
// Use generated context
public class AotService
{
private readonly AppJsonSerializerContext _context;
public AotService() => _context = new AppJsonSerializerContext();
public string Serialize(Product product)
{
return JsonSerializer.Serialize(product, _context.Product);
}
public Product? Deserialize(string json)
{
return JsonSerializer.Deserialize(json, _context.Product);
}
}AOT Analysis and Trimming
# Generate AOT analysis report
dotnet publish -c Release -r win-x64 /p:PublishAot=true /p:EnableAOTAnalyzer=true
# Output: aot-analysis.json
# Lists reflection usages that will fail at runtime
# Fix reflection issues
# 1. Add [DynamicDependency] attributes
# 2. Use source generators
# 3. Pre-register types
[DynamicDependency(typeof(System.Text.Json.JsonSerializer), "System.Text.Json", new[] { "System.Text.Json.Serialization" })]
public class MyService { }Startup Time Comparison
Benchmark: JIT vs R2R vs Native AOT
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Engines;
using BenchmarkDotNet.Jobs;
[Config(typeof(AotConfig))]
[MemoryDiagnoser]
public class StartupTimeBenchmark
{
[Benchmark(Baseline = true)]
public void JIT_Startup()
{
// JIT compilation happens on first call
var result = new OptimizedService().Process();
GC.KeepAlive(result);
}
[Benchmark]
public void R2R_Startup()
{
// Pre-compiled, but IL fallback available
var result = new OptimizedService().Process();
GC.KeepAlive(result);
}
[Benchmark]
public void AOT_Startup()
{
// Fully native, no JIT
var result = new AotService().Process();
GC.KeepAlive(result);
}
}
public class AotConfig : ManualConfig
{
public AotConfig()
{
// Run each benchmark multiple times for stable results
AddJob(Job.Default
.WithRun(RunStrategy.Monitoring)
.WithWarmupCount(10)
.WithIterationCount(10)
.WithLaunchCount(3));
}
}
// Expected results:
// | Method | Mean | Allocated |
// |--------------|-----------|-----------|
// | JIT_Startup | 15.234ms | 2.1 MB |
// | R2R_Startup | 2.456ms | 0.8 MB |
// | AOT_Startup | 1.234ms | 0.5 MB |Startup Time Optimization
// Eager JIT compilation
public static class StartupOptimizer
{
public static void WarmUp()
{
// Force JIT compilation of critical methods
var service = new OptimizedService();
service.Process();
service.HeavyComputation();
// Pre-warm common types
typeof(Product).GetProperties();
typeof(Order).GetProperties();
}
}
// Use in Program.cs
var builder = WebApplication.CreateBuilder(args);
StartupOptimizer.WarmUp(); // Pre-JIT critical paths
var app = builder.Build();Generic Sharing
Generic Type Instantiation
// Without generic sharing — each type creates separate native code
public class GenericRepository<T> where T : class
{
public T? GetById(int id) => ...;
public List<T> GetAll() => ...;
}
// With generic sharing — same code for all reference types
// .NET 5+ uses generic sharing for reference types
// Value types still get dedicated instantiation
// Check generic sharing
// Set COMPlus_JitStress=2 to stress test generic sharing
Environment.SetEnvironmentVariable("COMPlus_JitStress", "2");Generic Sharing Benefits
// Generic sharing reduces code size
// Before: 100 generic types × 1000 lines = 100KB native code
// After: 1 shared implementation = 1000 lines native code
// Generic sharing is automatic for:
// - Reference types (class)
// - Methods without generic constraints on value types
// For value types — explicit sharing via constraints
public class ValueRepo<T> where T : struct
{
// Each value type gets its own instantiation
// Consider using object or interface to enable sharing
}Array Bounds Check Elimination
JIT Bounds Check Optimization
// JIT eliminates bounds checks in predictable loops
public int Sum(int[] data)
{
int sum = 0;
// JIT knows: i < data.Length always true → no bounds check
for (int i = 0; i < data.Length; i++)
sum += data[i];
return sum;
}
// Bounds check NOT eliminated:
public int SumConditional(int[] data, int limit)
{
int sum = 0;
// JIT can't prove limit < data.Length → bounds check stays
for (int i = 0; i < limit; i++)
sum += data[i];
return sum;
}
// Fix: assert the bound
public int SumFixed(int[] data, int limit)
{
int sum = 0;
Debug.Assert(limit <= data.Length); // Hint to JIT
for (int i = 0; i < limit; i++)
sum += data[i];
return sum;
}Span Bounds Check Elimination
// Span<T> — bounds checked (safe)
public int SumSpan(Span<int> data)
{
int sum = 0;
for (int i = 0; i < data.Length; i++)
sum += data[i]; // Bounds check present
return sum;
}
// MemoryMarshal — unsafe but zero-check
public unsafe int SumUnsafe(Span<int> data)
{
int sum = 0;
fixed (int* ptr = data)
{
for (int i = 0; i < data.Length; i++)
sum += *(ptr + i); // No bounds check
}
return sum;
}String Intern Pool
String Interning
// String interning — share identical string objects
public class StringInterningService
{
public void Demo()
{
// "hello" is interned automatically
var s1 = "hello";
var s2 = "hello";
ReferenceEquals(s1, s2); // true — same object!
// Runtime-created strings are NOT interned
var s3 = new string(new[] { 'h', 'e', 'l', 'l', 'o' });
ReferenceEquals(s1, s3); // false — different objects
// Manual interning
var s4 = string.Intern(s3);
ReferenceEquals(s1, s4); // true — interned!
// IsInterned — check if already interned
var s5 = string.IsInterned("hello") ?? "not found";
}
}
// String interning trade-offs:
// Pros: Less memory for repeated strings
// Cons: Intern pool never shrinks, memory leak riskString Optimization
// Prefer compile-time constants (auto-interned)
const string ContentType = "application/json";
// Use string.Create for dynamic strings (zero-allocation)
public static string FormatMessage(string name, int count)
{
return string.Create(32, (name, count), (span, args) =>
{
span[^1] = '!';
args.count.ToString(span[..^1]);
// ...
});
}
// Use span-based operations (zero-allocation)
public bool ContainsUppercase(ReadOnlySpan<char> input)
{
foreach (var c in input)
{
if (c >= 'A' && c <= 'Z') return true;
}
return false;
}Практика
Задание 1: JIT vs R2R vs Native AOT Comparison
Цель: Сравнить startup time для same application.
Шаги:
- Создать ASP.NET Core API
- Publish: JIT, R2R, Native AOT
- Benchmark startup time (cold start)
- Benchmark peak performance
- Compare app size
- Document trade-offs
Задание 2: Generic-Heavy Codebase Optimization
Цель: Оптимизировать через understanding of generic sharing.
Шаги:
- Создать generic repository pattern
- Benchmark с 10+ generic types
- Измерить impact на code size и memory
- Оптимизировать: generic sharing, constraints
- Измерить improvement
Задание 3: Trimmed Native AOT Deployment
Цель: Соз trimmed AOT deployment с working reflection.
Шаги:
- Publish AOT → получить aot-analysis.json
- Fix reflection issues: [DynamicDependency], source generators
- Publish trimmed AOT
- Test all functionality
- Validate no runtime reflection errors
Контрольные вопросы
- Что такое Tiered Compilation?
- В чём разница между R2R и Native AOT?
- Как Native AOT влияет на reflection?
- Что такое generic sharing?
- Как JIT eliminates bounds checks?
Performance Culture и Process
Performance Budget
Defining Performance Budgets
// Performance budget as code
public class PerformanceBudget
{
public Dictionary<string, EndpointBudget> Endpoints { get; } = new();
public GlobalBudget Global { get; } = new();
public PerformanceBudget()
{
// API endpoint budgets
Endpoints["/api/products"] = new EndpointBudget
{
P99Latency = TimeSpan.FromMilliseconds(200),
P95Latency = TimeSpan.FromMilliseconds(100),
MaxAllocations = 1024 * 100, // 100KB
MaxGen0Collections = 5,
};
Endpoints["/api/orders"] = new EndpointBudget
{
P99Latency = TimeSpan.FromMilliseconds(500),
P95Latency = TimeSpan.FromMilliseconds(250),
MaxAllocations = 1024 * 500, // 500KB
MaxGen0Collections = 10,
};
// Global budgets
Global.MaxCpuPercent = 80;
Global.MaxMemoryPercent = 75;
Global.MaxThreadCount = 200;
}
}
public class EndpointBudget
{
public TimeSpan P99Latency { get; set; }
public TimeSpan P95Latency { get; set; }
public TimeSpan P50Latency { get; set; }
public long MaxAllocations { get; set; }
public int MaxGen0Collections { get; set; }
public double MaxErrorRate { get; set; } = 0.01; // 1%
}
public class GlobalBudget
{
public double MaxCpuPercent { get; set; }
public double MaxMemoryPercent { get; set; }
public int MaxThreadCount { get; set; }
public int MaxQueueLength { get; set; }
}Budget Enforcement Middleware
public class PerformanceBudgetMiddleware
{
private readonly RequestDelegate _next;
private readonly PerformanceBudget _budget;
private readonly ILogger<PerformanceBudgetMiddleware> _logger;
private readonly ITelemetryService _telemetry;
public PerformanceBudgetMiddleware(
RequestDelegate next,
IOptions<PerformanceBudget> budget,
ILogger<PerformanceBudgetMiddleware> logger,
ITelemetryService telemetry)
{
_next = next;
_budget = budget.Value;
_logger = logger;
_telemetry = telemetry;
}
public async Task InvokeAsync(HttpContext context)
{
var sw = Stopwatch.StartNew();
var startAllocs = GC.CollectionCount(0);
var startMemory = GC.GetTotalMemory(false);
await _next(context);
sw.Stop();
var elapsed = sw.Elapsed;
var allocations = GC.GetTotalMemory(false) - startMemory;
var gen0Collections = GC.CollectionCount(0) - startAllocs;
// Check budget
var budget = _budget.Endpoints.GetValueOrDefault(context.Path.ToString());
if (budget != null)
{
var violations = new List<string>();
if (elapsed > budget.P99Latency)
violations.Add($"P99 latency: {elapsed.TotalMilliseconds:F0}ms > {budget.P99Latency.TotalMilliseconds:F0}ms");
if (allocations > budget.MaxAllocations)
violations.Add($"Allocations: {allocations / 1024}KB > {budget.MaxAllocations / 1024}KB");
if (gen0Collections > budget.MaxGen0Collections)
violations.Add($"Gen0 collections: {gen0Collections} > {budget.MaxGen0Collections}");
if (violations.Count > 0)
{
_logger.LogWarning(
"Performance budget violated for {Path}: {Violations}",
context.Path, string.Join(", ", violations));
await _telemetry.TrackBudgetViolationAsync(new BudgetViolation
{
Path = context.Path.ToString(),
ElapsedMs = elapsed.TotalMilliseconds,
Allocations = allocations,
Gen0Collections = gen0Collections,
Violations = violations,
Timestamp = DateTime.UtcNow
});
}
}
}
}Continuous Performance Testing
CI Integration
# .github/workflows/performance.yml
name: Performance Gate
on:
pull_request:
branches: [ main ]
paths:
- 'src/**'
- 'benchmarks/**'
jobs:
performance-gate:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- name: Setup .NET
uses: actions/setup-dotnet@v4
with:
dotnet-version: '9.0.x'
- name: Restore
run: dotnet restore
- name: Build
run: dotnet build -c Release --no-restore
- name: Run benchmarks
run: |
cd benchmarks
dotnet run -c Release --filter Category:Critical --format markdown
- name: Check performance regression
run: |
$baseline = Get-Content "./baseline.json" | ConvertFrom-Json
$current = Get-Content "./results.json" | ConvertFrom-Json
$regression = $false
foreach ($benchmark in $current.Benchmarks) {
$baselineBench = $baseline.Benchmarks | Where-Object { $_.Name -eq $benchmark.Name }
if ($baselineBench) {
$ratio = $benchmark.Mean / $baselineBench.Mean
if ($ratio -gt 1.1) {
Write-Error "Regression: $($benchmark.Name) is $($ratio.ToString('P0')) slower"
$regression = $true
}
}
}
if ($regression) { exit 1 }
- name: Upload results
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: benchmarks/results/xUnit Performance Tests
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Xunit;
using Xunit.Abstractions;
public class PerformanceTests
{
private readonly ITestOutputHelper _output;
public PerformanceTests(ITestOutputHelper output)
{
_output = output;
}
[Fact]
public void StringJoin_ShouldNotExceedBudget()
{
var summary = BenchmarkRunner.Run<StringJoinBenchmark>();
var benchmark = summary.Benchmarks.First();
var mean = benchmark.Results.First().Mean;
_output.WriteLine($"StringJoin Mean: {mean:F3} μs");
// Performance budget: < 50μs
Assert.True(mean < 50, $"StringJoin exceeded budget: {mean:F3} μs");
}
[Fact]
public void DatabaseQuery_ShouldNotExceedBudget()
{
var summary = BenchmarkRunner.Run<DatabaseQueryBenchmark>();
var benchmark = summary.Benchmarks.First();
var mean = benchmark.Results.First().Mean;
_output.WriteLine($"DB Query Mean: {mean:F3} μs");
// Performance budget: < 5ms
Assert.True(mean < 5_000, $"DB Query exceeded budget: {mean:F3} μs");
}
[Fact]
public void Serialization_ShouldNotExceedBudget()
{
var summary = BenchmarkRunner.Run<SerializationBenchmark>();
var benchmark = summary.Benchmarks.First();
var mean = benchmark.Results.First().Mean;
var allocated = benchmark.Results.First().Allocated / 1024; // KB
_output.WriteLine($"Serialization: {mean:F3} μs, {allocated} KB");
// Performance budget: < 100μs, < 1KB allocations
Assert.True(mean < 100, $"Serialization latency exceeded: {mean:F3} μs");
Assert.True(allocated < 1, $"Serialization allocations exceeded: {allocated} KB");
}
}Performance Review Process
Architecture Decision Impact Analysis
// Performance impact checklist for architecture decisions
public class PerformanceImpactChecklist
{
public static List<string> CheckArchitectureDecision(string decision, IDictionary<string, object> context)
{
var impacts = new List<string>();
switch (decision)
{
case "Sync-over-async":
impacts.Add("Thread pool starvation risk — use fully async");
impacts.Add("Monitor thread pool queue length");
break;
case "Large object allocation":
impacts.Add("LOH fragmentation risk — use object pooling");
impacts.Add("Monitor LOH size via dotnet-counters");
break;
case "Static caching":
impacts.Add("Memory leak risk — implement eviction");
impacts.Add("Monitor cache size and eviction rate");
break;
case "Event-driven architecture":
impacts.Add("Message ordering guarantees");
impacts.Add("Backpressure handling");
impacts.Add("Dead letter queue strategy");
break;
case "Database sharding":
impacts.Add("Cross-shard query performance");
impacts.Add("Shard key selection impact on query patterns");
impacts.Add("Rebalancing cost");
break;
}
return impacts;
}
}
// Usage in architecture review
public class ArchitectureReview
{
public void Review(ArchitectureDecision decision)
{
var impacts = PerformanceImpactChecklist.CheckArchitectureDecision(
decision.Type, decision.Context);
foreach (var impact in impacts)
{
Console.WriteLine($"⚠️ {impact}");
}
}
}Capacity Planning
Growth Projection Model
public class CapacityPlanningModel
{
public double CurrentRps { get; set; }
public double CurrentLatencyP99 { get; set; }
public double CurrentCpuPercent { get; set; }
public double CurrentMemoryPercent { get; set; }
public double GrowthRatePerMonth { get; set; } // e.g., 0.1 = 10% per month
public int MonthsToPlan { get; set; } = 12;
public CapacityProjection Calculate()
{
var projections = new List<MonthlyProjection>();
double rps = CurrentRps;
double cpu = CurrentCpuPercent;
double memory = CurrentMemoryPercent;
for (int month = 1; month <= MonthsToPlan; month++)
{
rps *= (1 + GrowthRatePerMonth);
// Linear model: CPU scales with RPS
cpu = CurrentCpuPercent * (rps / CurrentRps);
// Memory scales with connections (assume constant per-connection memory)
memory = CurrentMemoryPercent * (rps / CurrentRps);
projections.Add(new MonthlyProjection
{
Month = month,
ProjectedRps = rps,
ProjectedCpu = cpu,
ProjectedMemory = memory,
ScalingTrigger = cpu > 80 || memory > 85
});
}
return new CapacityProjection
{
Projections = projections,
ScalingRecommendations = GenerateRecommendations(projections)
};
}
private List<ScalingRecommendation> GenerateRecommendations(List<MonthlyProjection> projections)
{
var recommendations = new List<ScalingRecommendation>();
foreach (var proj in projections.Where(p => p.ScalingTrigger))
{
if (proj.ProjectedCpu > 80)
{
recommendations.Add(new ScalingRecommendation
{
Month = proj.Month,
Type = "Scale Up",
Reason = $"CPU projected at {proj.ProjectedCpu:F0}%",
Action = "Increase instance size or add instances"
});
}
if (proj.ProjectedMemory > 85)
{
recommendations.Add(new ScalingRecommendation
{
Month = proj.Month,
Type = "Scale Memory",
Reason = $"Memory projected at {proj.ProjectedMemory:F0}%",
Action = "Increase RAM or optimize memory usage"
});
}
}
return recommendations;
}
}
public class MonthlyProjection
{
public int Month { get; set; }
public double ProjectedRps { get; set; }
public double ProjectedCpu { get; set; }
public double ProjectedMemory { get; set; }
public bool ScalingTrigger { get; set; }
}
public class CapacityProjection
{
public List<MonthlyProjection> Projections { get; set; } = new();
public List<ScalingRecommendation> ScalingRecommendations { get; set; } = new();
}
public class ScalingRecommendation
{
public int Month { get; set; }
public string Type { get; set; } = "";
public string Reason { get; set; } = "";
public string Action { get; set; } = "";
}Auto-Scaling Configuration
# Kubernetes HPA
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: api-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: api-server
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 75
behavior:
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Pods
value: 2
periodSeconds: 60
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Pods
value: 1
periodSeconds: 120Post-Mortem Process
Performance Incident Template
# Performance Incident Post-Mortem
## Summary
- **Date**: 2025-01-15
- **Duration**: 45 minutes
- **Impact**: P99 latency increased from 200ms to 5s
- **Severity**: P1
## Timeline
- 14:00 — Alert: P99 latency > 1s
- 14:02 — On-call engineer acknowledged
- 14:05 — Identified: GC Gen2 collections every 30 seconds
- 14:10 — Root cause: Memory leak in SessionCache
- 14:20 — Fix deployed: Added cache eviction
- 14:30 — P99 latency normalized to 200ms
## Root Cause Analysis
### What happened?
SessionCache grew unbounded — no eviction policy. After ~2 hours, LOH fragmentation caused frequent Gen2 collections.
### Why did it happen?
1. SessionCache used static List<T> without size limit
2. No monitoring on cache size
3. No eviction policy implemented
### How did we miss it?
1. Soak tests < 1 hour (leak appeared after 2 hours)
2. No alerting on LOH size
3. No performance budget for cache size
## Action Items
| Item | Owner | Due | Status |
|------|-------|-----|--------|
| Add cache eviction (LRU, max size) | @dev1 | 2025-01-17 | Done |
| Add LOH size alerting | @dev2 | 2025-01-20 | Open |
| Extend soak tests to 8 hours | @dev3 | 2025-01-22 | Open |
| Add performance budget for cache | @dev1 | 2025-01-25 | Open |
| Add dotnet-gcdump to monitoring | @dev2 | 2025-02-01 | Open |
## Prevention
1. Performance budget enforcement in CI
2. Soak tests > 4 hours for all services
3. LOH size monitoring and alerting
4. Regular capacity planning reviewsПрактика
Задание 1: Performance Gate в CI
Цель: Настроить automated benchmark comparison.
Требования:
- Baseline benchmark results stored in repository
- PR triggers benchmark run
- Compare with baseline (P95, P99, allocations)
- Fail if regression > 10%
- Publish results as artifact
Задание 2: Capacity Planning Model
Цель: Growth projection и scaling recommendations.
Требования:
- Model: RPS, CPU, Memory over 12 months
- Growth rate: 10% per month
- Scaling triggers: CPU > 80%, Memory > 85%
- Recommendations: Scale Up, Scale Out, Optimize
- Kubernetes HPA configuration
Задание 3: Performance Post-Mortem
Цель: Simulated incident с action items.
Сценарий: P99 latency spike, GC Gen2 collections, memory leak.
Требования:
- Написать post-mortem по шаблону
- Root cause analysis (5 Whys)
- Action items с owners и deadlines
- Prevention measures
Контрольные вопросы
- Что такое performance budget?
- Как continuous performance testing работает в CI?
- Как проводить capacity planning?
- Что должно быть в performance post-mortem?
- Какие metrics для performance alerting?
Контрольная точка модуля 11
Overview
Цель проекта
Создать comprehensive performance engineering framework для production system, включающий:
- BenchmarkDotNet suite с CI integration и regression detection
- Comprehensive profiling workflow (CPU, memory, GC, allocations)
- Load test automation с k6 и historical trend tracking
- Performance dashboard с P95/P99 latency, throughput, error rate metrics
- Native AOT deployment option с validated functionality
Архитектура фреймворка
┌─────────────────────────────────────────────────────────┐
│ Performance Framework │
├─────────────┬──────────────┬──────────────┬─────────────┤
│ Benchmarks │ Profiling │ Load Test │ Dashboard │
│ │ │ │ │
│ BDN Suite │ dotnet-trace │ k6 Scripts │ Grafana │
│ CI Gates │ dotnet-counters│ Soak Tests │ Prometheus │
│ Regression │ dotnet-gcdump│ Spike Tests │ Alerting │
│ Detection │ dotnet-dump │ Thresholds │ Trending │
└─────────────┴──────────────┴──────────────┴─────────────┘BenchmarkDotNet Suite
Project Structure
benchmarks/
├── Benchmarks.csproj
├── Program.cs
├── Categories/
│ ├── CriticalBenchmarks.cs
│ ├── StringBenchmarks.cs
│ └── DatabaseBenchmarks.cs
├── Configs/
│ ├── ProductionConfig.cs
│ └── RegressionConfig.cs
├── Comparisons/
│ ├── DotnetVersionComparison.cs
│ └── AlgorithmComparison.cs
└── Results/
├── baseline.json
└── latest/Critical Benchmarks
// Categories/CriticalBenchmarks.cs
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
using BenchmarkDotNet.Jobs;
[MemoryDiagnoser]
[HardwareCounters(
HardwareCounter.L1CacheMisses,
HardwareCounter.L2CacheMisses,
HardwareCounter.BranchMisses)]
[SimpleJob(RunStrategy.Throughput, warmupCount: 5, iterationCount: 10)]
[Categories("Critical")]
public class CriticalBenchmarks
{
private readonly byte[] _data = new byte[1024 * 1024];
private readonly string _json = """{"id":1,"name":"test","value":42.0}""";
[GlobalSetup]
public void Setup()
{
new Random(42).NextBytes(_data);
}
[Benchmark(Baseline = true)]
public void MemoryCopy() => Buffer.BlockCopy(_data, 0, _data, 0, _data.Length);
[Benchmark]
public void SpanCopy() => _data.AsSpan().CopyTo(_data);
[Benchmark]
public string Serialize() => JsonSerializer.Serialize(new { id = 1, name = "test", value = 42.0 });
[Benchmark]
public string Deserialize() => JsonSerializer.Deserialize<DynamicObject>(_json);
[Benchmark]
public int StringIndexOf() => "hello world 2024".IndexOf('w');
[Benchmark]
public void LinqSum() => Enumerable.Range(0, 100000).Sum();
}CI Regression Config
// Configs/RegressionConfig.cs
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Exporters;
using BenchmarkDotNet.Exporters.Csv;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Loggers;
using BenchmarkDotNet.Reports;
using BenchmarkDotNet.Running;
public class RegressionConfig : ManualConfig
{
public RegressionConfig()
{
// Fast execution for CI
AddJob(Job.Default
.WithRun(RunStrategy.Monitoring)
.WithWarmupCount(3)
.WithIterationCount(5)
.WithLaunchCount(1)
.WithId("CI-Fast"));
// Memory diagnostics
AddDiagnoser(MemoryDiagnoser.Default);
// CSV export for CI parsing
AddExporter(CsvMeasurementsExporter.Default);
AddExporter(MarkdownExporter.GitHub);
// Validators
AddValidator(ExecutionValidator.Default);
AddValidator(JitOptimizationsValidator.FailOnError);
}
}Baseline Comparison
// Comparisons/DotnetVersionComparison.cs
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;
[SimpleJob(RuntimeMoniker.Net80, RunStrategy.Throughput, baseline: true)]
[SimpleJob(RuntimeMoniker.Net90, RunStrategy.Throughput)]
[MemoryDiagnoser]
[Categories("Comparison")]
public class DotnetVersionComparison
{
private readonly int[] _data = Enumerable.Range(0, 100000).ToArray();
[Benchmark]
public int LinqSum() => _data.Sum();
[Benchmark]
public int ParallelSum()
{
long sum = 0;
Parallel.For(0, _data.Length, i =>
{
long local = 0;
for (int j = i; j < _data.Length; j += Environment.ProcessorCount)
local += _data[j];
Interlocked.Add(ref sum, local);
});
return (int)sum;
}
[Benchmark]
public string StringJoin() => string.Join(",", _data.Take(1000));
}Profiling Workflow
Automated Profiling Script
# scripts/profile.ps1
param(
[int]$ProcessId,
[string]$Duration = "00:02:00",
[string]$OutputDir = "./profiling-results"
)
New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
Write-Host "=== CPU Profiling ===" -ForegroundColor Cyan
dotnet trace collect -p $ProcessId --profile cpu-sampling --duration $Duration -o "$OutputDir/cpu.nettrace"
dotnet trace report --format flamegraph --input "$OutputDir/cpu.nettrace" --output "$OutputDir/flamegraph.html"
Write-Host "`n=== GC Metrics ===" -ForegroundColor Cyan
dotnet counters monitor -p $ProcessId --counters System.Runtime,Microsoft.AspNetCore.Server.Kestrel --refresh-interval 1000 --output "$OutputDir/gc-metrics.csv" --format csv --count 120
Write-Host "`n=== GC Dump ===" -ForegroundColor Cyan
dotnet gcdump collect -p $ProcessId -o "$OutputDir/heap.gcdump"
dotnet gcdump report "$OutputDir/heap.gcdump" --top-by-count 50 --top-by-size 50 --output "$OutputDir/gcdump-report.txt"
Write-Host "`n=== Heap Analysis ===" -ForegroundColor Cyan
dotnet dump collect -p $ProcessId -o "$OutputDir/dump.dmp"
dotnet dump analyze "$OutputDir/dump.dmp" -c "!dumpheap -stat -type System.Byte[]" -c "!finalizequeue" -c "!threads" -o "$OutputDir/dump-report.txt"
Write-Host "`n=== Results saved to $OutputDir ===" -ForegroundColor GreenProfiling Checklist
## Profiling Checklist
### CPU Profiling
- [ ] Identify hot path via flame graph
- [ ] Check JIT inlining (COMPlus_JitDump=*)
- [ ] Verify SIMD usage for bulk operations
- [ ] Check for sync-over-async patterns
### Memory Profiling
- [ ] Check allocation rate (dotnet-counters)
- [ ] Analyze GC generations (dotnet-gcdump)
- [ ] Find retention paths (dotnet-gcdump --roots)
- [ ] Check LOH fragmentation
- [ ] Verify object pooling for frequent allocations
### GC Analysis
- [ ] Gen 0 collection frequency
- [ ] Gen 2 collection frequency and pause time
- [ ] LOH size over time
- [ ] Finalizer queue length
- [ ] GC mode (Workstation vs Server)
### Thread Analysis
- [ ] Thread pool queue length
- [ ] Thread count
- [ ] Blocked threads
- [ ] Sync-over-async detectionLoad Test Automation
k6 Load Test Suite
// load-tests/api-load-test.js
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend, Counter } from 'k6/metrics';
// Custom metrics
const apiLatency = new Trend('api_latency');
const apiErrors = new Rate('api_errors');
const ordersTotal = new Counter('orders_total');
export const options = {
stages: [
{ duration: '1m', target: 50 }, // Warm-up
{ duration: '5m', target: 100 }, // Steady state
{ duration: '1m', target: 200 }, // Spike
{ duration: '2m', target: 200 }, // Spike hold
{ duration: '1m', target: 100 }, // Ramp down
{ duration: '4h', target: 100 }, // Soak test
],
thresholds: {
'api_latency': [
'p(50)<100',
'p(90)<300',
'p(95)<500',
'p(99)<1000',
],
'api_errors': ['rate<0.01'],
'http_req_duration': ['p(99)<1000'],
},
};
const BASE_URL = __ENV.BASE_URL || 'http://localhost:5000';
function login() {
const res = http.post(`${BASE_URL}/api/auth/login`, JSON.stringify({
username: 'testuser',
password: 'testpass',
}), {
headers: { 'Content-Type': 'application/json' },
});
check(res, {
'login successful': (r) => r.status === 200,
'login < 500ms': (r) => r.timings.duration < 500,
});
return res.json('token');
}
function browseProducts(token) {
const res = http.get(`${BASE_URL}/api/products?page=1&pageSize=20`, {
headers: { 'Authorization': `Bearer ${token}` },
});
apiLatency.add(res.timings.duration);
check(res, {
'products loaded': (r) => r.status === 200,
'products < 500ms': (r) => r.timings.duration < 500,
'has products': (r) => JSON.parse(r.body).length > 0,
});
if (!res.error) apiErrors.add(0);
else apiErrors.add(1);
}
function placeOrder(token) {
const res = http.post(`${BASE_URL}/api/orders`, JSON.stringify({
productId: Math.floor(Math.random() * 100) + 1,
quantity: Math.floor(Math.random() * 5) + 1,
}), {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${token}`,
},
});
apiLatency.add(res.timings.duration);
if (res.status === 201) {
ordersTotal.add(1);
check(res, {
'order created': (r) => r.status === 201,
'order < 1s': (r) => r.timings.duration < 1000,
});
}
if (!res.error) apiErrors.add(0);
else apiErrors.add(1);
}
export default function () {
const token = login();
// 80% browse, 20% order
for (let i = 0; i < 5; i++) {
browseProducts(token);
sleep(0.5, 2);
}
if (Math.random() < 0.2) {
placeOrder(token);
}
sleep(1, 3);
}Soak Test Configuration
// load-tests/soak-test.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
stages: [
{ duration: '15m', target: 100 },
{ duration: '4h', target: 100 },
{ duration: '15m', target: 0 },
],
thresholds: {
'http_req_duration': ['p(95)<500'],
'http_req_failed': ['rate<0.01'],
},
};
// Monitor for degradation patterns:
// - Memory growth (heap size over time)
// - Latency increase over time
// - Error rate increase over time
// - Connection pool exhaustionPerformance Dashboard
Prometheus Metrics
// Monitoring/PerformanceMetrics.cs
using Prometheus;
public class PerformanceMetrics
{
private static readonly Histogram RequestDuration = Metrics.CreateHistogram(
"api_request_duration_seconds",
"API request duration in seconds",
new HistogramConfiguration()
.WithLabels("endpoint", "method")
.WithExponentialBuckets(0.001, 2, 10));
private static readonly Histogram AllocationSize = Metrics.CreateHistogram(
"api_allocation_size_bytes",
"Allocation size per request in bytes",
new HistogramConfiguration()
.WithLabels("endpoint")
.WithExponentialBuckets(1024, 2, 14));
private static readonly Counter RequestCount = Metrics.CreateCounter(
"api_request_total",
"Total API requests");
private static readonly Counter ErrorCount = Metrics.CreateCounter(
"api_error_total",
"Total API errors");
private static readonly Gauge GcGen0Count = Metrics.CreateGauge(
"dotnet_gc_gen0_collections_total",
"GC Gen 0 collection count");
private static readonly Gauge GcGen2Count = Metrics.CreateGauge(
"dotnet_gc_gen2_collections_total",
"GC Gen 2 collection count");
private static readonly Gauge HeapSize = Metrics.CreateGauge(
"dotnet_gc_heap_size_bytes",
"GC heap size in bytes");
private static readonly Gauge CpuUsage = Metrics.CreateGauge(
"dotnet_cpu_usage_percent",
"CPU usage percent");
public static void TrackRequest(string endpoint, string method, double durationSeconds, long allocationBytes, bool isError)
{
RequestDuration.WithLabels(endpoint, method).Observe(durationSeconds);
AllocationSize.WithLabels(endpoint).Observe(allocationBytes);
RequestCount.Inc();
if (isError) ErrorCount.Inc();
}
public static void TrackGcMetrics()
{
GcGen0Count.Set(GC.CollectionCount(0));
GcGen2Count.Set(GC.CollectionCount(2));
HeapSize.Set(GC.GetTotalMemory(false));
}
public static void TrackCpuUsage()
{
// Use performance counter
var cpuPercent = GetCpuUsage();
CpuUsage.Set(cpuPercent);
}
private static double GetCpuUsage()
{
// Implementation depends on platform
return 0;
}
}Grafana Dashboard JSON
{
"dashboard": {
"title": "API Performance Dashboard",
"panels": [
{
"title": "Request Latency (P50/P95/P99)",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.50, rate(api_request_duration_seconds_bucket[5m]))",
"legendFormat": "P50"
},
{
"expr": "histogram_quantile(0.95, rate(api_request_duration_seconds_bucket[5m]))",
"legendFormat": "P95"
},
{
"expr": "histogram_quantile(0.99, rate(api_request_duration_seconds_bucket[5m]))",
"legendFormat": "P99"
}
]
},
{
"title": "Throughput (RPS)",
"type": "graph",
"targets": [
{
"expr": "rate(api_request_total[5m])"
}
]
},
{
"title": "Error Rate",
"type": "graph",
"targets": [
{
"expr": "rate(api_error_total[5m]) / rate(api_request_total[5m])"
}
]
},
{
"title": "GC Gen 0/2 Collections",
"type": "graph",
"targets": [
{
"expr": "dotnet_gc_gen0_collections_total"
},
{
"expr": "dotnet_gc_gen2_collections_total"
}
]
},
{
"title": "Heap Size",
"type": "graph",
"targets": [
{
"expr": "dotnet_gc_heap_size_bytes"
}
]
},
{
"title": "CPU Usage",
"type": "graph",
"targets": [
{
"expr": "dotnet_cpu_usage_percent"
}
]
}
]
}
}Native AOT Deployment
AOT Publish Pipeline
# scripts/publish-aot.sh
#!/bin/bash
set -e
echo "=== Publishing Native AOT ==="
# Build
dotnet build -c Release
# AOT analysis
echo "=== AOT Analysis ==="
dotnet publish -c Release -r linux-x64 /p:PublishAot=true /p:EnableAOTAnalyzer=true -o ./aot-analysis
# Check for AOT warnings
if grep -q "Warning" ./aot-analysis/aot-analysis.json 2>/dev/null; then
echo "AOT warnings detected - review aot-analysis.json"
cat ./aot-analysis/aot-analysis.json
fi
# Trimmed AOT publish
echo "=== Trimmed AOT Publish ==="
dotnet publish -c Release -r linux-x64 \
/p:PublishAot=true \
/p:PublishTrimmed=true \
/p:TrimMode=link \
/p:InvariantGlobalization=true \
-o ./dist/aot
# Verify
echo "=== Verification ==="
ls -lh ./dist/aot
./dist/aot/myapp --health
echo "=== AOT Publish Complete ==="AOT Compatibility Checklist
## AOT Compatibility Checklist
### Reflection
- [ ] No dynamic method generation
- [ ] No RuntimeTypeHandle usage
- [ ] No Activator.CreateInstance with unknown types
- [ ] Use source generators for serialization
### Dynamic Code
- [ ] No Emit libraries (System.Reflection.Emit)
- [ ] No DynamicMethod
- [ ] No MethodBase.GetMethodFromHandle
### Dependencies
- [ ] All dependencies support AOT
- [ ] No native libraries without AOT support
- [ ] Globalization: InvariantGlobalization enabled
### Testing
- [ ] All endpoints functional after AOT publish
- [ ] Serialization/deserialization works
- [ ] Background tasks work
- [ ] Health checks passКритерии прохождения
Checklist
- [ ] CI fails на >10% performance regression в critical benchmarks
- BenchmarkDotNet suite с baseline comparison
- GitHub Actions workflow с regression detection
- Auto-comment on PR with benchmark results
- [ ] All P99 latencies within defined budget per endpoint
- Performance budget middleware
- Grafana dashboard with P99 tracking
- Alerting on budget violation
- [ ] Memory allocation rate < 1MB/sec под steady-state load
- dotnet-counters monitoring
- Allocation tracking middleware
- Object pooling for hot paths
- [ ] Successful identification и resolution of simulated performance incident
- Simulated: memory leak + GC pause spike
- Use profiling tools to identify root cause
- Fix and verify improvement
- [ ] Documented capacity plan с scaling triggers и growth projections
- Capacity planning model (12 months)
- Scaling recommendations
- Kubernetes HPA configuration
Оценка
| Критерий | Вес | Оценка |
|---|---|---|
| BDN suite с CI integration | 20% | |
| Profiling workflow (CPU, memory, GC) | 20% | |
| Load test automation с k6 | 15% | |
| Performance dashboard | 15% | |
| Native AOT deployment | 10% | |
| Capacity planning | 10% | |
| Post-mortem documentation | 10% | |
| Итого | 100% |
Дополнительные ресурсы
- [BenchmarkDotNet Documentation](https://benchmarkdotnet.org/articles/guides/getting-started.html)
- [k6 Documentation](https://k6.io/docs/)
- [Prometheus Documentation](https://prometheus.io/docs/)
- [Grafana Documentation](https://grafana.com/docs/)
- [Native AOT Documentation](https://learn.microsoft.com/en-us/dotnet/core/deploying/native-aot/)
- [.NET Performance Best Practices](https://learn.microsoft.com/en-us/dotnet/standard/performance)