-rwxr-xr-x 11250 lib25519-20241004/autogen/speed raw
#!/usr/bin/env python3
output = r'''/* WARNING: auto-generated (by autogen/speed); do not edit */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <cpucycles.h> /* -lcpucycles */
#include <lib25519.h> /* -l25519 */
#include <randombytes.h> /* -lrandombytes */
static const char *targeto = 0;
static const char *targetp = 0;
static const char *targeti = 0;
#include "limits.inc"
static void *callocplus(long long len)
{
void *x = calloc(1,len + 128);
if (!x) abort();
return x;
}
static void *aligned(void *x)
{
unsigned char *y = x;
y += 63 & (-(unsigned long) x);
return y;
}
static void longlong_sort(long long *x,long long n)
{
long long top,p,q,r,i;
if (n < 2) return;
top = 1;
while (top < n - top) top += top;
for (p = top;p > 0;p >>= 1) {
for (i = 0;i < n - p;++i)
if (!(i & p))
if (x[i] > x[i+p]) {
long long t = x[i];
x[i] = x[i+p];
x[i+p] = t;
}
i = 0;
for (q = top;q > p;q >>= 1) {
for (;i < n - q;++i) {
if (!(i & p)) {
long long a = x[i + p];
for (r = q;r > p;r >>= 1)
if (a > x[i+r]) {
long long t = a;
a = x[i+r];
x[i+r] = t;
}
x[i + p] = a;
}
}
}
}
}
#define TIMINGS 32 // must be multiple of 4
static long long t[TIMINGS+1];
static void t_print(const char *op,long long impl,long long len)
{
long long tsort[TIMINGS];
long long iqm = 0;
printf("%s",op);
if (impl >= 0)
printf(" %lld",impl);
else
printf(" selected");
printf(" %lld",len);
for (long long i = 0;i < TIMINGS;++i)
tsort[i] = t[i] = t[i+1]-t[i];
longlong_sort(tsort,TIMINGS);
for (long long j = TIMINGS/4;j < 3*TIMINGS/4;++j)
iqm += tsort[j];
iqm *= 2;
iqm += TIMINGS/2;
iqm /= TIMINGS;
printf(" %lld ",iqm);
for (long long i = 0;i < TIMINGS;++i)
printf("%+lld",t[i]-iqm);
printf("\n");
fflush(stdout);
}
#define MAXBATCH 16
#define MAXTEST_BYTES 65536
static void measure_cpucycles(void)
{
printf("cpucycles selected persecond %lld\n",cpucycles_persecond());
printf("cpucycles selected implementation %s\n",cpucycles_implementation());
for (long long i = 0;i <= TIMINGS;++i)
t[i] = cpucycles();
t_print("cpucycles",-1,0);
}
static void measure_randombytes(void)
{
void *mstorage = callocplus(MAXTEST_BYTES);
unsigned char *m = aligned(mstorage);
long long mlen = 0;
while (mlen < MAXTEST_BYTES) {
for (long long i = 0;i <= TIMINGS;++i) {
t[i] = cpucycles();
randombytes(m,mlen);
}
t_print("randombytes",-1,mlen);
mlen += 1+mlen/2;
}
free(mstorage);
}
'''
# XXX: integrate todo into api
todo = (
('verify',(
('x','lib25519_verify_BYTES'),
('y','lib25519_verify_BYTES'),
),(
('crypto_verify','x','y'),
)),
('hashblocks',(
('h','lib25519_hashblocks_STATEBYTES'),
('m','MAXTEST_BYTES'),
('mlen',None),
),(
('crypto_hashblocks','h','m','mlen'),
)),
('hash',(
('h','lib25519_hash_BYTES'),
('m','MAXTEST_BYTES'),
('mlen',None),
),(
('crypto_hash','h','m','mlen'),
)),
('pow',(
('n','lib25519_pow_BYTES'),
('ne','lib25519_pow_BYTES'),
),(
('crypto_pow','ne','n'),
)),
('powbatch',(
('n','MAXBATCH*lib25519_powbatch_BYTES'),
('ne','MAXBATCH*lib25519_powbatch_BYTES'),
),(
('crypto_powbatch','ne','n','batch'),
)),
('nP',(
('n','lib25519_nP_SCALARBYTES'),
('P','lib25519_nP_POINTBYTES'),
('nP','lib25519_nP_POINTBYTES'),
),(
('crypto_nP','nP','n','P'),
)),
('nPbatch',(
('n','MAXBATCH*lib25519_nPbatch_SCALARBYTES'),
('P','MAXBATCH*lib25519_nPbatch_POINTBYTES'),
('nP','MAXBATCH*lib25519_nPbatch_POINTBYTES'),
),(
('crypto_nPbatch','nP','n','P','batch'),
)),
('nG',(
('n','lib25519_nG_SCALARBYTES'),
('nG','lib25519_nG_POINTBYTES'),
),(
('crypto_nG','nG','n'),
)),
('mGnP',(
('mGnP','lib25519_mGnP_OUTPUTBYTES'),
('m','lib25519_mGnP_MBYTES'),
('n','lib25519_mGnP_NBYTES'),
('P','lib25519_mGnP_PBYTES'),
),(
('crypto_mGnP','mGnP','m','n','P'),
)),
('multiscalar',(
('Q','MAXBATCH*lib25519_multiscalar_OUTPUTBYTES'),
('n','MAXBATCH*lib25519_multiscalar_SCALARBYTES'),
('P','MAXBATCH*lib25519_multiscalar_POINTBYTES'),
),(
('crypto_multiscalar','Q','n','P','batch'),
)),
('dh',(
('pka','lib25519_dh_PUBLICKEYBYTES'),
('ska','lib25519_dh_SECRETKEYBYTES'),
('pkb','lib25519_dh_PUBLICKEYBYTES'),
('skb','lib25519_dh_SECRETKEYBYTES'),
('ka','lib25519_dh_BYTES'),
),(
('crypto_dh_keypair','pka','ska'),
('crypto_dh_keypair','pkb','skb'),
('crypto_dh','ka','pkb','ska'),
)),
('sign',(
('pk','lib25519_sign_PUBLICKEYBYTES'),
('sk','lib25519_sign_SECRETKEYBYTES'),
('m','MAXTEST_BYTES+lib25519_sign_BYTES'),
('sm','MAXTEST_BYTES+lib25519_sign_BYTES'),
('m2','MAXTEST_BYTES+lib25519_sign_BYTES'),
('mlen',None),
('smlen',None),
('m2len',None),
),(
('crypto_sign_keypair','pk','sk'),
('crypto_sign','sm','&smlen','m','mlen','sk'),
('crypto_sign_open','m2','&m2len','sm','smlen','pk'),
)),
)
operations = []
primitives = {}
sizes = {}
exports = {}
prototypes = {}
with open('api') as f:
for line in f:
line = line.strip()
if line.startswith('crypto_'):
x = line.split()
x = x[0].split('/')
assert len(x) == 2
o = x[0].split('_')[1]
if o not in operations: operations += [o]
p = x[1]
if o not in primitives: primitives[o] = []
primitives[o] += [p]
continue
if line.startswith('#define '):
x = line.split(' ')
x = x[1].split('_')
assert len(x) == 4
assert x[0] == 'crypto'
o = x[1]
p = x[2]
if (o,p) not in sizes: sizes[o,p] = ''
sizes[o,p] += line+'\n'
continue
if line.endswith(');'):
fun,args = line[:-2].split('(')
rettype,fun = fun.split()
fun = fun.split('_')
o = fun[1]
assert fun[0] == 'crypto'
if o not in exports: exports[o] = []
exports[o] += ['_'.join(fun[1:])]
if o not in prototypes: prototypes[o] = []
prototypes[o] += [(rettype,fun,args)]
for t in todo:
o,vars,benches = t
for p in primitives[o]:
output += '\n'
output += 'static void measure_%s_%s(void)\n' % (o,p)
output += '{\n'
output += ' if (targeto && strcmp(targeto,"%s")) return;\n' % o
output += ' if (targetp && strcmp(targetp,"%s")) return;\n' % p
varsize = {}
for v,size in vars:
if size is None:
output += ' long long %s;\n' % v
else:
size = size.replace('lib25519_'+o,'lib25519_'+o+'_'+p)
output += ' void *%sstorage = callocplus(%s);\n' % (v,size)
output += ' unsigned char *%s = aligned(%sstorage);\n' % (v,v)
varsize[v] = size
output += '\n'
output += ' for (long long impl = -1;impl < lib25519_numimpl_%s_%s();++impl) {\n' % (o,p)
for rettype,fun,args in prototypes[o]:
output += ' %s (*%s)(%s);\n' % (rettype,'_'.join(fun),args)
output += ' if (targeti && strcmp(targeti,lib25519_dispatch_%s_%s_implementation(impl))) continue;\n' % (o,p)
output += ' if (impl >= 0) {\n'
for rettype,fun,args in prototypes[o]:
f2 = ['lib25519','dispatch',o,p]+fun[2:]
output += ' %s = %s(impl);\n' % ('_'.join(fun),'_'.join(f2))
output += r' printf("%s_%s %%lld implementation %%s compiler %%s\n",impl,lib25519_dispatch_%s_%s_implementation(impl),lib25519_dispatch_%s_%s_compiler(impl));' % (o,p,o,p,o,p)
output += '\n'
output += ' } else {\n'
for rettype,fun,args in prototypes[o]:
f2 = ['lib25519',o,p]+fun[2:]
output += ' %s = %s;\n' % ('_'.join(fun),'_'.join(f2))
output += r' printf("%s_%s selected implementation %%s compiler %%s\n",lib25519_%s_%s_implementation(),lib25519_%s_%s_compiler());' % (o,p,o,p,o,p)
output += '\n'
output += ' }\n'
for v,size in vars:
if size is not None:
size = size.replace('lib25519_'+o,'lib25519_'+o+'_'+p)
output += ' randombytes(%s,%s);\n' % (v,size)
alreadybenched = set()
alreadybenched.add('assert')
for b in benches:
if b[0] in alreadybenched:
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
continue
fun = b[0].split('_')
shortfun = '_'.join([o,p]+fun[2:])
alreadybenched.add(b[0])
if 'mlen' in b[1:] or 'smlen' in b[1:]:
output += ' mlen = 0;\n'
output += ' while (mlen <= MAXTEST_BYTES) {\n'
output += ' randombytes(m,mlen);\n'
if shortfun == 'sign_ed25519_open': # XXX: put this into todo
output += ' lib25519_sign(sm,&smlen,m,mlen,sk);\n'
output += ' for (long long i = 0;i <= TIMINGS;++i) {\n'
output += ' t[i] = cpucycles();\n'
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
output += ' }\n'
output += ' t_print("%s",impl,mlen);\n' % (shortfun)
if shortfun == 'sign_ed25519_open': # XXX: put this into todo
output += ' /* this is, in principle, not a test program */\n'
output += ' /* but some checks here help validate the data flow above */\n'
output += ' assert(m2len == mlen);\n'
output += ' assert(!memcmp(m,m2,mlen));\n'
if o == 'sign': # XXX: put this into todo
output += ' mlen += 1+mlen/4;\n'
else:
output += ' mlen += 1+mlen/2;\n'
output += ' }\n'
elif 'batch' in o or 'multiscalar' in o:
output += ' for (long long batch = 0;batch <= MAXBATCH;++batch) {\n'
output += ' for (long long i = 0;i <= TIMINGS;++i) {\n'
output += ' t[i] = cpucycles();\n'
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
output += ' }\n'
output += ' t_print("%s",impl,batch);\n' % shortfun
output += ' }\n'
else:
output += ' for (long long i = 0;i <= TIMINGS;++i) {\n'
output += ' t[i] = cpucycles();\n'
output += ' %s(%s);\n' % (b[0],','.join(b[1:]))
output += ' }\n'
output += ' t_print("%s",impl,%s);\n' % (shortfun,varsize[b[1]])
output += ' }\n'
for v,size in reversed(vars):
if size is not None:
output += ' free(%sstorage);\n' % v
output += '}\n'
output += r'''
#include "print_cpuid.inc"
int main(int argc,char **argv)
{
printf("lib25519 version %s\n",lib25519_version);
printf("lib25519 arch %s\n",lib25519_arch);
print_cpuid();
if (*argv) ++argv;
if (*argv) {
targeto = *argv++;
if (*argv) {
targetp = *argv++;
if (*argv) {
targeti = *argv++;
}
}
}
measure_cpucycles();
measure_randombytes();
limits();
'''
for t in todo:
o,vars,benches = t
for p in primitives[o]:
output += ' measure_%s_%s();\n' % (o,p)
output += r'''
return 0;
}
'''
with open('command/lib25519-speed.c','w') as f:
f.write(output)