Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using CUDA, NNlib, BenchmarkTools, Test, BSON
- using Plots
- pyplot() # for scale = :log2
- BenchmarkTools.DEFAULT_PARAMETERS.gctrial = false
- BenchmarkTools.DEFAULT_PARAMETERS.samples = 5
- BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.2
- BenchmarkTools.DEFAULT_PARAMETERS.evals = 1
- function _softmax!(y::T, x::T; dims) where {T<:DenseCuArray}
- y .= exp.(x .- maximum(x; dims))
- y ./= sum(y; dims)
- end
- # validate
- @testset begin
- x = CUDA.rand((2:8)...)
- out = similar(x)
- for i = 1:ndims(x)
- ref = softmax(Array(x), dims = i) |> cu
- @test softmax!(out, x, dims = i) ≈ ref
- @test _softmax!(out, x, dims = i) ≈ ref
- end
- end
- function plot_perf!(plt, x, y, ds, dims)
- plot!(
- plt,
- x,
- y,
- label = "$(ds) dims=$(dims)",
- legend = :outertopright,
- title = "benchmark softmax
- Ratio = log(Julia Time / CUDNN Time)
- >0 means CUDNN is faster than Julia",
- xlabel = "batch size",
- ylabel = "Ratio",
- # yscale = :log10,
- xscale = :log2,
- dpi = 300,
- )
- plt
- end
- select_last_dim(xs::AbstractArray{T,N}, inds) where {T,N} =
- @views xs[ntuple(_ -> (:), N - 1)..., inds]
- function benchsoftmax(nd, maxbatch)
- x = CUDA.rand(nd..., 2^maxbatch)
- out = similar(x)
- batches = 2 .^ (1:maxbatch) # change it to the desired values.
- results = Array{BenchmarkTools.Trial}(undef, length(batches), 2)
- for dims = 1:ndims(x)
- isfile("benchmark_softmax_$(nd)_$(dims).BSON") && continue
- for (i, b) in enumerate(batches)
- println("$i / $(length(batches))")
- y = select_last_dim(x, 1:b)
- o = select_last_dim(x, 1:b)
- for (j, fn) in [(1, _softmax!), (2, softmax!)]
- results[i, j] = @benchmark CUDA.@sync $fn($o, $y, dims = $dims)
- end
- end
- BSON.@save "benchmark_softmax_$(nd)_$(dims).BSON" nd batches dims results
- end
- end
- for i in (6:12)
- benchsoftmax((2^i,), 24 - i)
- end
- benchsoftmax((1024, 1024), 8)
- fnames = [
- "benchmark_softmax_(64,)_1.BSON"
- "benchmark_softmax_(128,)_1.BSON"
- "benchmark_softmax_(256,)_1.BSON"
- "benchmark_softmax_(512,)_1.BSON"
- "benchmark_softmax_(1024,)_1.BSON"
- "benchmark_softmax_(2048,)_1.BSON"
- "benchmark_softmax_(4096,)_1.BSON"
- "benchmark_softmax_(64,)_2.BSON"
- "benchmark_softmax_(128,)_2.BSON"
- "benchmark_softmax_(256,)_2.BSON"
- "benchmark_softmax_(512,)_2.BSON"
- "benchmark_softmax_(1024,)_2.BSON"
- "benchmark_softmax_(2048,)_2.BSON"
- "benchmark_softmax_(4096,)_2.BSON"
- "benchmark_softmax_(1024, 1024)_1.BSON"
- "benchmark_softmax_(1024, 1024)_2.BSON"
- "benchmark_softmax_(1024, 1024)_3.BSON"
- ""
- ""
- ""
- ""
- ]
- for (i, fname) in enumerate(fnames)
- if i == 1
- plt = plot()
- end
- try
- BSON.@load fname nd batches dims results
- dur = time.(median.(results))
- ratio = log.(dur[:, 1] ./ dur[:, 2])
- plot_perf!(plt, batches, ratio, "$nd x B", dims)
- catch e
- end
- if i % 7 == 0
- savefig(plt, "benchmark_softmax_$i.svg")
- savefig(plt, "benchmark_softmax_$i.png")
- plt = plot()
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement