Advertisement
maifeeulasad

gpu-data.json

Mar 19th, 2025
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
JSON 16.56 KB | None | 0 0
  1. [
  2.     {
  3.         "GPU": "BC-250",
  4.         "Tok/s": "26.89 -33.52 tokens/s",
  5.         "TFLOPS": "",
  6.         "Format": "GGUF",
  7.         "Cost": "$20",
  8.         "Loading Secs": "21.49secs",
  9.         "2nd Load": "",
  10.         "Context (max)s": "",
  11.         "Context sent": "109 tokens",
  12.         "VRAM": "",
  13.         "TDP": "",
  14.         "watts inference": "197W",
  15.         "Watts idle(Loaded)": "85W* - 101W",
  16.         "Watts idle (0B VRAM)": "85W* - 101W",
  17.         "Notes": "* 101W stock on P4.00G Bios. 85W with oberon-governor Single node on APW3+ and 12V Delta blower fan."
  18.     },
  19.     {
  20.         "GPU": "P102-100",
  21.         "Tok/s": "22.62 tokens/s",
  22.         "TFLOPS": "10.77 fp32",
  23.         "Format": "GGUF",
  24.         "Cost": "$40",
  25.         "Loading Secs": "11.4secs",
  26.         "2nd Load": "",
  27.         "Context (max)s": "8192",
  28.         "Context sent": "109 tokens",
  29.         "VRAM": "9320MB",
  30.         "TDP": "250W",
  31.         "watts inference": "140-220W",
  32.         "Watts idle(Loaded)": "9W",
  33.         "Watts idle (0B VRAM)": "9W",
  34.         "Notes": ""
  35.     },
  36.     {
  37.         "GPU": "P104-100 Q6_K_L",
  38.         "Tok/s": "16.92 tokens/s",
  39.         "TFLOPS": "6.655 fp32",
  40.         "Format": "GGUF",
  41.         "Cost": "$30",
  42.         "Loading Secs": "26.33secs",
  43.         "2nd Load": "16.24secs",
  44.         "Context (max)s": "8192",
  45.         "Context sent": "109 tokens",
  46.         "VRAM": "7362MB",
  47.         "TDP": "180W",
  48.         "watts inference": "85-155W",
  49.         "Watts idle(Loaded)": "5W",
  50.         "Watts idle (0B VRAM)": "5W",
  51.         "Notes": ""
  52.     },
  53.     {
  54.         "GPU": "M40",
  55.         "Tok/s": "15.67 tokens/s",
  56.         "TFLOPS": "6.832 fp32",
  57.         "Format": "GGUF",
  58.         "Cost": "$40",
  59.         "Loading Secs": "23.44secs",
  60.         "2nd Load": "2.4secs",
  61.         "Context (max)s": "8192",
  62.         "Context sent": "109 tokens",
  63.         "VRAM": "9292MB",
  64.         "TDP": "250W",
  65.         "watts inference": "125-220W",
  66.         "Watts idle(Loaded)": "62W",
  67.         "Watts idle (0B VRAM)": "15W",
  68.         "Notes": "CUDA error: CUDA-capable device(s) is/are busy or unavailable"
  69.     },
  70.     {
  71.         "GPU": "GTX 1060 Q4_K_M",
  72.         "Tok/s": "15.17 tokens/s",
  73.         "TFLOPS": "4.375 fp32",
  74.         "Format": "GGUF",
  75.         "Cost": "",
  76.         "Loading Secs": "",
  77.         "2nd Load": "2.02secs",
  78.         "Context (max)s": "4096",
  79.         "Context sent": "109 tokens",
  80.         "VRAM": "5278MB",
  81.         "TDP": "120W",
  82.         "watts inference": "65-120W",
  83.         "Watts idle(Loaded)": "5W",
  84.         "Watts idle (0B VRAM)": "5W",
  85.         "Notes": ""
  86.     },
  87.     {
  88.         "GPU": "GTX 1070 ti Q6_K_L",
  89.         "Tok/s": "17.28 tokens/s",
  90.         "TFLOPS": "8.186 fp32",
  91.         "Format": "GGUF",
  92.         "Cost": "$100",
  93.         "Loading Secs": "19.70secs",
  94.         "2nd Load": "3.55secs",
  95.         "Context (max)s": "8192",
  96.         "Context sent": "109 tokens",
  97.         "VRAM": "7684MB***",
  98.         "TDP": "180W",
  99.         "watts inference": "90-170W",
  100.         "Watts idle(Loaded)": "6W",
  101.         "Watts idle (0B VRAM)": "6W",
  102.         "Notes": "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf"
  103.     },
  104.     {
  105.         "GPU": "AMD Radeon Instinct MI25",
  106.         "Tok/s": "soon..",
  107.         "TFLOPS": "",
  108.         "Format": "",
  109.         "Cost": "",
  110.         "Loading Secs": "",
  111.         "2nd Load": "",
  112.         "Context (max)s": "",
  113.         "Context sent": "",
  114.         "VRAM": "",
  115.         "TDP": "",
  116.         "watts inference": "",
  117.         "Watts idle(Loaded)": "",
  118.         "Watts idle (0B VRAM)": "",
  119.         "Notes": ""
  120.     },
  121.     {
  122.         "GPU": "P4",
  123.         "Tok/s": "",
  124.         "TFLOPS": "5.704 fp32",
  125.         "Format": "GGUF",
  126.         "Cost": "$100",
  127.         "Loading Secs": "",
  128.         "2nd Load": "",
  129.         "Context (max)s": "8192",
  130.         "Context sent": "109 tokens",
  131.         "VRAM": "",
  132.         "TDP": "75W",
  133.         "watts inference": "",
  134.         "Watts idle(Loaded)": "",
  135.         "Watts idle (0B VRAM)": "",
  136.         "Notes": ""
  137.     },
  138.     {
  139.         "GPU": "P40",
  140.         "Tok/s": "18.56 tokens/s",
  141.         "TFLOPS": "11.76 fp32",
  142.         "Format": "GGUF",
  143.         "Cost": "$300",
  144.         "Loading Secs": "",
  145.         "2nd Load": "3.58secs**",
  146.         "Context (max)s": "8192",
  147.         "Context sent": "109 tokens",
  148.         "VRAM": "9341MB",
  149.         "TDP": "250W",
  150.         "watts inference": "90-150W",
  151.         "Watts idle(Loaded)": "50W",
  152.         "Watts idle (0B VRAM)": "10W",
  153.         "Notes": "same inference time with or without flash_attention. **NVME on another server"
  154.     },
  155.     {
  156.         "GPU": "P100",
  157.         "Tok/s": "21.48 tokens/s",
  158.         "TFLOPS": "9.526 fp32",
  159.         "Format": "GGUF",
  160.         "Cost": "$150",
  161.         "Loading Secs": "23.51secs",
  162.         "2nd Load": "",
  163.         "Context (max)s": "8192",
  164.         "Context sent": "109 tokens",
  165.         "VRAM": "9448MB",
  166.         "TDP": "250W",
  167.         "watts inference": "80-140W",
  168.         "Watts idle(Loaded)": "33W",
  169.         "Watts idle (0B VRAM)": "26W",
  170.         "Notes": ""
  171.     },
  172.     {
  173.         "GPU": "P100",
  174.         "Tok/s": "29.58 tokens/s",
  175.         "TFLOPS": "19.05 fp16",
  176.         "Format": "EXL2",
  177.         "Cost": "$150",
  178.         "Loading Secs": "22.51secs",
  179.         "2nd Load": "6.95secs",
  180.         "Context (max)s": "8192",
  181.         "Context sent": "109 tokens",
  182.         "VRAM": "9458MB",
  183.         "TDP": "250W",
  184.         "watts inference": "95-150W",
  185.         "Watts idle(Loaded)": "33W",
  186.         "Watts idle (0B VRAM)": "26W",
  187.         "Notes": "no_flash_attn=true"
  188.     },
  189.     {
  190.         "GPU": "CMP 70HX Q6_K_L",
  191.         "Tok/s": "12.8 tokens/s",
  192.         "TFLOPS": "10.71 fp32",
  193.         "Format": "GGUF",
  194.         "Cost": "$150",
  195.         "Loading Secs": "26.7secs",
  196.         "2nd Load": "9secs",
  197.         "Context (max)s": "8192",
  198.         "Context sent": "109 tokens",
  199.         "VRAM": "7693MB",
  200.         "TDP": "220W",
  201.         "watts inference": "80-100W",
  202.         "Watts idle(Loaded)": "65W** 13W setting p-state 8",
  203.         "Watts idle (0B VRAM)": "65W",
  204.         "Notes": "Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf RISER"
  205.     },
  206.     {
  207.         "GPU": "CMP 70HX Q6_K_L",
  208.         "Tok/s": "16.47 tokens/s",
  209.         "TFLOPS": "10.71 fp32",
  210.         "Format": "GGUF/FA",
  211.         "Cost": "$150",
  212.         "Loading Secs": "26.78secs",
  213.         "2nd Load": "9secs",
  214.         "Context (max)s": "8192",
  215.         "Context sent": "109 tokens",
  216.         "VRAM": "7391MB",
  217.         "TDP": "220W",
  218.         "watts inference": "80-110W",
  219.         "Watts idle(Loaded)": "65W",
  220.         "Watts idle (0B VRAM)": "65W",
  221.         "Notes": "flash_attention RISER"
  222.     },
  223.     {
  224.         "GPU": "CMP 70HX 6bpw",
  225.         "Tok/s": "25.12 tokens/s",
  226.         "TFLOPS": "10.71 fp16",
  227.         "Format": "EXL2",
  228.         "Cost": "$150",
  229.         "Loading Secs": "22.07secs",
  230.         "2nd Load": "8.81secs",
  231.         "Context (max)s": "8192",
  232.         "Context sent": "109 tokens",
  233.         "VRAM": "7653MB",
  234.         "TDP": "220W",
  235.         "watts inference": "70-110W",
  236.         "Watts idle(Loaded)": "65W",
  237.         "Watts idle (0B VRAM)": "65W",
  238.         "Notes": "turboderp/Llama-3.1-8B-Instruct-exl2 at 6.0bpw no_flash_attn RISER"
  239.     },
  240.     {
  241.         "GPU": "CMP 70HX 6bpw",
  242.         "Tok/s": "30.08 tokens/s",
  243.         "TFLOPS": "10.71 fp16",
  244.         "Format": "EXL2/FA",
  245.         "Cost": "$150",
  246.         "Loading Secs": "22.22secs",
  247.         "2nd Load": "13.14secs",
  248.         "Context (max)s": "8192",
  249.         "Context sent": "109 tokens",
  250.         "VRAM": "7653MB",
  251.         "TDP": "220W",
  252.         "watts inference": "110W",
  253.         "Watts idle(Loaded)": "65W",
  254.         "Watts idle (0B VRAM)": "65W",
  255.         "Notes": "turboderp/Llama-3.1-8B-Instruct-exl2:6.0bpw RISER"
  256.     },
  257.     {
  258.         "GPU": "GTX 1080ti",
  259.         "Tok/s": "22.80 tokens/s",
  260.         "TFLOPS": "11.34 fp32",
  261.         "Format": "GGUF",
  262.         "Cost": "$160",
  263.         "Loading Secs": "23.99secs",
  264.         "2nd Load": "2.89secs",
  265.         "Context (max)s": "8192",
  266.         "Context sent": "109 tokens",
  267.         "VRAM": "9332MB",
  268.         "TDP": "250W",
  269.         "watts inference": "120-200W",
  270.         "Watts idle(Loaded)": "8W",
  271.         "Watts idle (0B VRAM)": "8W",
  272.         "Notes": "RISER"
  273.     },
  274.     {
  275.         "GPU": "CMP 100-210",
  276.         "Tok/s": "25.07 tokens/s",
  277.         "TFLOPS": "11.75 fp32",
  278.         "Format": "GGUF",
  279.         "Cost": "$150",
  280.         "Loading Secs": "39.98secs",
  281.         "2nd Load": "",
  282.         "Context (max)s": "8192",
  283.         "Context sent": "109 tokens",
  284.         "VRAM": "9461MB",
  285.         "TDP": "250W",
  286.         "watts inference": "80-130W",
  287.         "Watts idle(Loaded)": "28W",
  288.         "Watts idle (0B VRAM)": "24W",
  289.         "Notes": "rope_freq_base=0, or coredump"
  290.     },
  291.     {
  292.         "GPU": "CMP 100-210",
  293.         "Tok/s": "40.66 tokens/s",
  294.         "TFLOPS": "23.49 fp16",
  295.         "Format": "EXL2",
  296.         "Cost": "$150",
  297.         "Loading Secs": "41.43secs",
  298.         "2nd Load": "",
  299.         "Context (max)s": "8192",
  300.         "Context sent": "109 tokens",
  301.         "VRAM": "9489MB",
  302.         "TDP": "250W",
  303.         "watts inference": "120-170W",
  304.         "Watts idle(Loaded)": "28W",
  305.         "Watts idle (0B VRAM)": "24W",
  306.         "Notes": "no_flash_attn=true"
  307.     },
  308.     {
  309.         "GPU": "RTX 3070 Q6_K_L",
  310.         "Tok/s": "27.96 tokens/s",
  311.         "TFLOPS": "20.31 fp32",
  312.         "Format": "GGUF",
  313.         "Cost": "$250",
  314.         "Loading Secs": "",
  315.         "2nd Load": "5.15secs",
  316.         "Context (max)s": "8192",
  317.         "Context sent": "109 tokens",
  318.         "VRAM": "7765MB",
  319.         "TDP": "240W",
  320.         "watts inference": "145-165W",
  321.         "Watts idle(Loaded)": "23W",
  322.         "Watts idle (0B VRAM)": "15W",
  323.         "Notes": ""
  324.     },
  325.     {
  326.         "GPU": "RTX 3070 Q6_K_L",
  327.         "Tok/s": "29.63 tokens/s",
  328.         "TFLOPS": "20.31 fp32",
  329.         "Format": "GGUF/FA",
  330.         "Cost": "$250",
  331.         "Loading Secs": "22.4secs",
  332.         "2nd Load": "5.3secs",
  333.         "Context (max)s": "8192",
  334.         "Context sent": "109 tokens",
  335.         "VRAM": "7435MB",
  336.         "TDP": "240W",
  337.         "watts inference": "165-185W",
  338.         "Watts idle(Loaded)": "23W",
  339.         "Watts idle (0B VRAM)": "15W",
  340.         "Notes": ""
  341.     },
  342.     {
  343.         "GPU": "RTX 3070 6bpw",
  344.         "Tok/s": "31.36 tokens/s",
  345.         "TFLOPS": "20.31 fp16",
  346.         "Format": "EXL2",
  347.         "Cost": "$250",
  348.         "Loading Secs": "",
  349.         "2nd Load": "5.17secs",
  350.         "Context (max)s": "8192",
  351.         "Context sent": "109 tokens",
  352.         "VRAM": "7707MiB",
  353.         "TDP": "240W",
  354.         "watts inference": "140-155W",
  355.         "Watts idle(Loaded)": "23W",
  356.         "Watts idle (0B VRAM)": "15W",
  357.         "Notes": ""
  358.     },
  359.     {
  360.         "GPU": "RTX 3070 6bpw",
  361.         "Tok/s": "35.27 tokens/s",
  362.         "TFLOPS": "20.31 fp16",
  363.         "Format": "EXL2/FA",
  364.         "Cost": "$250",
  365.         "Loading Secs": "17.48secs",
  366.         "2nd Load": "5.39secs",
  367.         "Context (max)s": "8192",
  368.         "Context sent": "109 tokens",
  369.         "VRAM": "7707MiB",
  370.         "TDP": "240W",
  371.         "watts inference": "130-145W",
  372.         "Watts idle(Loaded)": "23W",
  373.         "Watts idle (0B VRAM)": "15W",
  374.         "Notes": ""
  375.     },
  376.     {
  377.         "GPU": "Titan V",
  378.         "Tok/s": "37.37 tokens/s",
  379.         "TFLOPS": "14.90 fp32",
  380.         "Format": "GGUF",
  381.         "Cost": "$300",
  382.         "Loading Secs": "23.38 sec",
  383.         "2nd Load": "2.53secs",
  384.         "Context (max)s": "8192",
  385.         "Context sent": "109 tokens",
  386.         "VRAM": "9502MB",
  387.         "TDP": "250W",
  388.         "watts inference": "90W-127W",
  389.         "Watts idle(Loaded)": "25W",
  390.         "Watts idle (0B VRAM)": "25W",
  391.         "Notes": "--tensorcores"
  392.     },
  393.     {
  394.         "GPU": "Titan V",
  395.         "Tok/s": "45.65 tokens/s",
  396.         "TFLOPS": "29.80 fp16",
  397.         "Format": "EXL2",
  398.         "Cost": "$300",
  399.         "Loading Secs": "20.75secs",
  400.         "2nd Load": "6.27secs",
  401.         "Context (max)s": "8192",
  402.         "Context sent": "109 tokens",
  403.         "VRAM": "9422MB",
  404.         "TDP": "250W",
  405.         "watts inference": "110-130W",
  406.         "Watts idle(Loaded)": "25W",
  407.         "Watts idle (0B VRAM)": "23W",
  408.         "Notes": "no_flash_attn=true"
  409.     },
  410.     {
  411.         "GPU": "Tesla T4",
  412.         "Tok/s": "19.57 tokens/s",
  413.         "TFLOPS": "8.141 fp32",
  414.         "Format": "GGUF",
  415.         "Cost": "$500",
  416.         "Loading Secs": "23.72secs",
  417.         "2nd Load": "2.24secs",
  418.         "Context (max)s": "8192",
  419.         "Context sent": "109 tokens",
  420.         "VRAM": "9294MB",
  421.         "TDP": "70W",
  422.         "watts inference": "45-50w",
  423.         "Watts idle(Loaded)": "37W",
  424.         "Watts idle (0B VRAM)": "10-27W",
  425.         "Notes": "Card I had bounced between P0 & P8 idle"
  426.     },
  427.     {
  428.         "GPU": "Tesla T4",
  429.         "Tok/s": "23.99 tokens/s",
  430.         "TFLOPS": "65.13 fp16",
  431.         "Format": "EXL2",
  432.         "Cost": "$500",
  433.         "Loading Secs": "27.04secs",
  434.         "2nd Load": "6.63secs",
  435.         "Context (max)s": "8192",
  436.         "Context sent": "109 tokens",
  437.         "VRAM": "9220MB",
  438.         "TDP": "70W",
  439.         "watts inference": "60-70W",
  440.         "Watts idle(Loaded)": "27W",
  441.         "Watts idle (0B VRAM)": "10-27W",
  442.         "Notes": ""
  443.     },
  444.     {
  445.         "GPU": "Titan RTX",
  446.         "Tok/s": "31.62 tokens/s",
  447.         "TFLOPS": "16.31 fp32",
  448.         "Format": "GGUF",
  449.         "Cost": "$700",
  450.         "Loading Secs": "",
  451.         "2nd Load": "2.93secs",
  452.         "Context (max)s": "8192",
  453.         "Context sent": "109 tokens",
  454.         "VRAM": "9358MB",
  455.         "TDP": "280W",
  456.         "watts inference": "180-210W",
  457.         "Watts idle(Loaded)": "15W",
  458.         "Watts idle (0B VRAM)": "15W",
  459.         "Notes": "--tensorcores"
  460.     },
  461.     {
  462.         "GPU": "Titan RTX",
  463.         "Tok/s": "32.56 tokens/s",
  464.         "TFLOPS": "16.31 fp32",
  465.         "Format": "GGUF/FA",
  466.         "Cost": "$700",
  467.         "Loading Secs": "23.78secs",
  468.         "2nd Load": "2.92secs",
  469.         "Context (max)s": "8192",
  470.         "Context sent": "109 tokens",
  471.         "VRAM": "9056MB",
  472.         "TDP": "280W",
  473.         "watts inference": "185-215W",
  474.         "Watts idle(Loaded)": "15W",
  475.         "Watts idle (0B VRAM)": "15W",
  476.         "Notes": "--tensorcores flash_attn=true"
  477.     },
  478.     {
  479.         "GPU": "Titan RTX",
  480.         "Tok/s": "44.15 tokens/s",
  481.         "TFLOPS": "32.62 fp16",
  482.         "Format": "EXL2",
  483.         "Cost": "$700",
  484.         "Loading Secs": "26.58secs",
  485.         "2nd Load": "6.47secs",
  486.         "Context (max)s": "8192",
  487.         "Context sent": "109 tokens",
  488.         "VRAM": "9246MB",
  489.         "TDP": "280W",
  490.         "watts inference": "220-240W",
  491.         "Watts idle(Loaded)": "15W",
  492.         "Watts idle (0B VRAM)": "15W",
  493.         "Notes": "no_flash_attn=true"
  494.     },
  495.     {
  496.         "GPU": "RTX 3090",
  497.         "Tok/s": "35.13 tokens/s",
  498.         "TFLOPS": "35.58 fp32",
  499.         "Format": "GGUF",
  500.         "Cost": "$700",
  501.         "Loading Secs": "24.00secs",
  502.         "2nd Load": "2.89secs",
  503.         "Context (max)s": "8192",
  504.         "Context sent": "109 tokens",
  505.         "VRAM": "9456MB",
  506.         "TDP": "350W",
  507.         "watts inference": "235-260W",
  508.         "Watts idle(Loaded)": "17W",
  509.         "Watts idle (0B VRAM)": "6W",
  510.         "Notes": ""
  511.     },
  512.     {
  513.         "GPU": "RTX 3090",
  514.         "Tok/s": "36.02 token/s",
  515.         "TFLOPS": "35.58 fp32",
  516.         "Format": "GGUF/FA",
  517.         "Cost": "$700",
  518.         "Loading Secs": "",
  519.         "2nd Load": "2.82secs",
  520.         "Context (max)s": "8192",
  521.         "Context sent": "109 tokens",
  522.         "VRAM": "9154MB",
  523.         "TDP": "350W",
  524.         "watts inference": "260-265W",
  525.         "Watts idle(Loaded)": "17W",
  526.         "Watts idle (0B VRAM)": "6W",
  527.         "Notes": ""
  528.     },
  529.     {
  530.         "GPU": "RTX 3090",
  531.         "Tok/s": "49.11 tokens/s",
  532.         "TFLOPS": "35.58 fp16",
  533.         "Format": "EXL2",
  534.         "Cost": "$700",
  535.         "Loading Secs": "26.14secs",
  536.         "2nd Load": "7.63secs",
  537.         "Context (max)s": "8192",
  538.         "Context sent": "109 tokens",
  539.         "VRAM": "9360MB",
  540.         "TDP": "350W",
  541.         "watts inference": "270-315W",
  542.         "Watts idle(Loaded)": "17W",
  543.         "Watts idle (0B VRAM)": "6W",
  544.         "Notes": ""
  545.     },
  546.     {
  547.         "GPU": "RTX 3090",
  548.         "Tok/s": "54.75 tokens/s",
  549.         "TFLOPS": "35.58 fp16",
  550.         "Format": "EXL2/FA",
  551.         "Cost": "$700",
  552.         "Loading Secs": "",
  553.         "2nd Load": "7.37secs",
  554.         "Context (max)s": "8192",
  555.         "Context sent": "109 tokens",
  556.         "VRAM": "9360MB",
  557.         "TDP": "350W",
  558.         "watts inference": "285-310W",
  559.         "Watts idle(Loaded)": "17W",
  560.         "Watts idle (0B VRAM)": "6W",
  561.         "Notes": ""
  562.     }
  563. ]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement