tdfonline.com.ar - c99shell

!C99Shell v. 2.1 [PHP 8 Update] [02.02.2022]!
Software: Apache/2.4.53 (Unix) OpenSSL/1.1.1o PHP/7.4.29 mod_perl/2.0.12 Perl/v5.34.1. PHP/7.4.29 uname -a: Linux vps-2738122-x 4.15.0-213-generic #224-Ubuntu SMP Mon Jun 19 13:30:12 UTC 2023 x86_64 uid=1(daemon) gid=1(daemon) grupos=1(daemon) Safe-mode: OFF (not secure) /opt/apex_tdfonline/proyectos/tdfonline/www/docs/openssl/crypto/ec/asm/ drwxr-xr-x Free 13.43 GB of 61.93 GB (21.69%) Encoder Tools Proc. FTP brute Sec. SQL PHP-code Update Feedback Self remove Logout

#! /usr/bin/env perl
# Copyright 2018-2020 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the Apache License 2.0 (the "License").  You may not use
# this file except in compliance with the License.  You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# X25519 lower-level primitives for PPC64.
#
# July 2018.
#
# Base 2^64 is faster than base 2^51 on pre-POWER8, most notably ~15%
# faster on PPC970/G5. POWER8 on the other hand seems to trip on own
# shoelaces when handling longer carry chains. As base 2^51 has just
# single-carry pairs, it's 25% faster than base 2^64. Since PPC970 is
# pretty old, base 2^64 implementation is not engaged. Comparison to
# compiler-generated code is complicated by the fact that not all
# compilers support 128-bit integers. When compiler doesn't, like xlc,
# this module delivers more than 2x improvement, and when it does,
# from 12% to 30% improvement was measured...

# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;

$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";

open OUT,"| \"$^X\" $xlate $flavour \"$output\""
    or die "can't call $xlate: $!";
*STDOUT=*OUT;

my $sp = "r1";
my ($rp,$ap,$bp) = map("r$_",3..5);

####################################################### base 2^64
if (0) {
my ($bi,$a0,$a1,$a2,$a3,$t0,$t1, $t2,$t3,
    $acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7) =
    map("r$_",(6..12,22..31));
my $zero = "r0";
my $FRAME = 16*8;

$code.=<<___;
.text

.globl    x25519_fe64_mul
.type    x25519_fe64_mul,\@function
.align    5
x25519_fe64_mul:
    stdu    $sp,-$FRAME($sp)
    std    r22,`$FRAME-8*10`($sp)
    std    r23,`$FRAME-8*9`($sp)
    std    r24,`$FRAME-8*8`($sp)
    std    r25,`$FRAME-8*7`($sp)
    std    r26,`$FRAME-8*6`($sp)
    std    r27,`$FRAME-8*5`($sp)
    std    r28,`$FRAME-8*4`($sp)
    std    r29,`$FRAME-8*3`($sp)
    std    r30,`$FRAME-8*2`($sp)
    std    r31,`$FRAME-8*1`($sp)

    ld    $bi,0($bp)
    ld    $a0,0($ap)
    xor    $zero,$zero,$zero
    ld    $a1,8($ap)
    ld    $a2,16($ap)
    ld    $a3,24($ap)

    mulld    $acc0,$a0,$bi        # a[0]*b[0]
    mulhdu    $t0,$a0,$bi
    mulld    $acc1,$a1,$bi        # a[1]*b[0]
    mulhdu    $t1,$a1,$bi
    mulld    $acc2,$a2,$bi        # a[2]*b[0]
    mulhdu    $t2,$a2,$bi
    mulld    $acc3,$a3,$bi        # a[3]*b[0]
    mulhdu    $t3,$a3,$bi
___
for(my @acc=($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7),
    my $i=1; $i<4; shift(@acc), $i++) {
my $acc4 = $i==1? $zero : @acc[4];

$code.=<<___;
    ld    $bi,`8*$i`($bp)
    addc    @acc[1],@acc[1],$t0    # accumulate high parts
    mulld    $t0,$a0,$bi
    adde    @acc[2],@acc[2],$t1
    mulld    $t1,$a1,$bi
    adde    @acc[3],@acc[3],$t2
    mulld    $t2,$a2,$bi
    adde    @acc[4],$acc4,$t3
    mulld    $t3,$a3,$bi
    addc    @acc[1],@acc[1],$t0    # accumulate low parts
    mulhdu    $t0,$a0,$bi
    adde    @acc[2],@acc[2],$t1
    mulhdu    $t1,$a1,$bi
    adde    @acc[3],@acc[3],$t2
    mulhdu    $t2,$a2,$bi
    adde    @acc[4],@acc[4],$t3
    mulhdu    $t3,$a3,$bi
    adde    @acc[5],$zero,$zero
___
}
$code.=<<___;
    li    $bi,38
    addc    $acc4,$acc4,$t0
    mulld    $t0,$acc4,$bi
    adde    $acc5,$acc5,$t1
    mulld    $t1,$acc5,$bi
    adde    $acc6,$acc6,$t2
    mulld    $t2,$acc6,$bi
    adde    $acc7,$acc7,$t3
    mulld    $t3,$acc7,$bi

    addc    $acc0,$acc0,$t0
    mulhdu    $t0,$acc4,$bi
    adde    $acc1,$acc1,$t1
    mulhdu    $t1,$acc5,$bi
    adde    $acc2,$acc2,$t2
    mulhdu    $t2,$acc6,$bi
    adde    $acc3,$acc3,$t3
    mulhdu    $t3,$acc7,$bi
    adde    $acc4,$zero,$zero

    addc    $acc1,$acc1,$t0
    adde    $acc2,$acc2,$t1
    adde    $acc3,$acc3,$t2
    adde    $acc4,$acc4,$t3

    mulld    $acc4,$acc4,$bi

    addc    $acc0,$acc0,$acc4
    addze    $acc1,$acc1
    addze    $acc2,$acc2
    addze    $acc3,$acc3

    subfe    $acc4,$acc4,$acc4    # carry -> ~mask
    std    $acc1,8($rp)
    andc    $acc4,$bi,$acc4
    std    $acc2,16($rp)
    add    $acc0,$acc0,$acc4
    std    $acc3,24($rp)
    std    $acc0,0($rp)

    ld    r22,`$FRAME-8*10`($sp)
    ld    r23,`$FRAME-8*9`($sp)
    ld    r24,`$FRAME-8*8`($sp)
    ld    r25,`$FRAME-8*7`($sp)
    ld    r26,`$FRAME-8*6`($sp)
    ld    r27,`$FRAME-8*5`($sp)
    ld    r28,`$FRAME-8*4`($sp)
    ld    r29,`$FRAME-8*3`($sp)
    ld    r30,`$FRAME-8*2`($sp)
    ld    r31,`$FRAME-8*1`($sp)
    addi    $sp,$sp,$FRAME
    blr
    .long    0
    .byte    0,12,4,0,0x80,10,3,0
    .long    0
.size    x25519_fe64_mul,.-x25519_fe64_mul

.globl    x25519_fe64_sqr
.type    x25519_fe64_sqr,\@function
.align    5
x25519_fe64_sqr:
    stdu    $sp,-$FRAME($sp)
    std    r22,`$FRAME-8*10`($sp)
    std    r23,`$FRAME-8*9`($sp)
    std    r24,`$FRAME-8*8`($sp)
    std    r25,`$FRAME-8*7`($sp)
    std    r26,`$FRAME-8*6`($sp)
    std    r27,`$FRAME-8*5`($sp)
    std    r28,`$FRAME-8*4`($sp)
    std    r29,`$FRAME-8*3`($sp)
    std    r30,`$FRAME-8*2`($sp)
    std    r31,`$FRAME-8*1`($sp)

    ld    $a0,0($ap)
    xor    $zero,$zero,$zero
    ld    $a1,8($ap)
    ld    $a2,16($ap)
    ld    $a3,24($ap)

    ################################
    #  |  |  |  |  |  |a1*a0|  |
    #  |  |  |  |  |a2*a0|  |  |
    #  |  |a3*a2|a3*a0|  |  |  |
    #  |  |  |  |a2*a1|  |  |  |
    #  |  |  |a3*a1|  |  |  |  |
    # *|  |  |  |  |  |  |  | 2|
    # +|a3*a3|a2*a2|a1*a1|a0*a0|
    #  |--+--+--+--+--+--+--+--|
    #  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is $accx, i.e. follow $accx
    #
    #  "can't overflow" below mark carrying into high part of
    #  multiplication result, which can't overflow, because it
    #  can never be all ones.

    mulld    $acc1,$a1,$a0        # a[1]*a[0]
    mulhdu    $t1,$a1,$a0
    mulld    $acc2,$a2,$a0        # a[2]*a[0]
    mulhdu    $t2,$a2,$a0
    mulld    $acc3,$a3,$a0        # a[3]*a[0]
    mulhdu    $acc4,$a3,$a0

    addc    $acc2,$acc2,$t1        # accumulate high parts of multiplication
     mulld    $t0,$a2,$a1        # a[2]*a[1]
     mulhdu    $t1,$a2,$a1
    adde    $acc3,$acc3,$t2
     mulld    $t2,$a3,$a1        # a[3]*a[1]
     mulhdu    $t3,$a3,$a1
    addze    $acc4,$acc4        # can't overflow

    mulld    $acc5,$a3,$a2        # a[3]*a[2]
    mulhdu    $acc6,$a3,$a2

    addc    $t1,$t1,$t2        # accumulate high parts of multiplication
     mulld    $acc0,$a0,$a0        # a[0]*a[0]
    addze    $t2,$t3            # can't overflow

    addc    $acc3,$acc3,$t0        # accumulate low parts of multiplication
     mulhdu    $a0,$a0,$a0
    adde    $acc4,$acc4,$t1
     mulld    $t1,$a1,$a1        # a[1]*a[1]
    adde    $acc5,$acc5,$t2
     mulhdu    $a1,$a1,$a1
    addze    $acc6,$acc6        # can't overflow

    addc    $acc1,$acc1,$acc1    # acc[1-6]*=2
     mulld    $t2,$a2,$a2        # a[2]*a[2]
    adde    $acc2,$acc2,$acc2
     mulhdu    $a2,$a2,$a2
    adde    $acc3,$acc3,$acc3
     mulld    $t3,$a3,$a3        # a[3]*a[3]
    adde    $acc4,$acc4,$acc4
     mulhdu    $a3,$a3,$a3
    adde    $acc5,$acc5,$acc5
    adde    $acc6,$acc6,$acc6
    addze    $acc7,$zero

    addc    $acc1,$acc1,$a0        # +a[i]*a[i]
     li    $bi,38
    adde    $acc2,$acc2,$t1
    adde    $acc3,$acc3,$a1
    adde    $acc4,$acc4,$t2
    adde    $acc5,$acc5,$a2
    adde    $acc6,$acc6,$t3
    adde    $acc7,$acc7,$a3

    mulld    $t0,$acc4,$bi
    mulld    $t1,$acc5,$bi
    mulld    $t2,$acc6,$bi
    mulld    $t3,$acc7,$bi

    addc    $acc0,$acc0,$t0
    mulhdu    $t0,$acc4,$bi
    adde    $acc1,$acc1,$t1
    mulhdu    $t1,$acc5,$bi
    adde    $acc2,$acc2,$t2
    mulhdu    $t2,$acc6,$bi
    adde    $acc3,$acc3,$t3
    mulhdu    $t3,$acc7,$bi
    addze    $acc4,$zero

    addc    $acc1,$acc1,$t0
    adde    $acc2,$acc2,$t1
    adde    $acc3,$acc3,$t2
    adde    $acc4,$acc4,$t3

    mulld    $acc4,$acc4,$bi

    addc    $acc0,$acc0,$acc4
    addze    $acc1,$acc1
    addze    $acc2,$acc2
    addze    $acc3,$acc3

    subfe    $acc4,$acc4,$acc4    # carry -> ~mask
    std    $acc1,8($rp)
    andc    $acc4,$bi,$acc4
    std    $acc2,16($rp)
    add    $acc0,$acc0,$acc4
    std    $acc3,24($rp)
    std    $acc0,0($rp)

    ld    r22,`$FRAME-8*10`($sp)
    ld    r23,`$FRAME-8*9`($sp)
    ld    r24,`$FRAME-8*8`($sp)
    ld    r25,`$FRAME-8*7`($sp)
    ld    r26,`$FRAME-8*6`($sp)
    ld    r27,`$FRAME-8*5`($sp)
    ld    r28,`$FRAME-8*4`($sp)
    ld    r29,`$FRAME-8*3`($sp)
    ld    r30,`$FRAME-8*2`($sp)
    ld    r31,`$FRAME-8*1`($sp)
    addi    $sp,$sp,$FRAME
    blr
    .long    0
    .byte    0,12,4,0,0x80,10,2,0
    .long    0
.size    x25519_fe64_sqr,.-x25519_fe64_sqr

.globl    x25519_fe64_mul121666
.type    x25519_fe64_mul121666,\@function
.align    5
x25519_fe64_mul121666:
    lis    $bi,`65536>>16`
    ori    $bi,$bi,`121666-65536`

    ld    $t0,0($ap)
    ld    $t1,8($ap)
    ld    $bp,16($ap)
    ld    $ap,24($ap)

    mulld    $a0,$t0,$bi
    mulhdu    $t0,$t0,$bi
    mulld    $a1,$t1,$bi
    mulhdu    $t1,$t1,$bi
    mulld    $a2,$bp,$bi
    mulhdu    $bp,$bp,$bi
    mulld    $a3,$ap,$bi
    mulhdu    $ap,$ap,$bi

    addc    $a1,$a1,$t0
    adde    $a2,$a2,$t1
    adde    $a3,$a3,$bp
    addze    $ap,    $ap

    mulli    $ap,$ap,38

    addc    $a0,$a0,$ap
    addze    $a1,$a1
    addze    $a2,$a2
    addze    $a3,$a3

    subfe    $t1,$t1,$t1        # carry -> ~mask
    std    $a1,8($rp)
    andc    $t0,$t0,$t1
    std    $a2,16($rp)
    add    $a0,$a0,$t0
    std    $a3,24($rp)
    std    $a0,0($rp)

    blr
    .long    0
    .byte    0,12,0x14,0,0,0,2,0
    .long    0
.size    x25519_fe64_mul121666,.-x25519_fe64_mul121666

.globl    x25519_fe64_add
.type    x25519_fe64_add,\@function
.align    5
x25519_fe64_add:
    ld    $a0,0($ap)
    ld    $t0,0($bp)
    ld    $a1,8($ap)
    ld    $t1,8($bp)
    ld    $a2,16($ap)
    ld    $bi,16($bp)
    ld    $a3,24($ap)
    ld    $bp,24($bp)

    addc    $a0,$a0,$t0
    adde    $a1,$a1,$t1
    adde    $a2,$a2,$bi
    adde    $a3,$a3,$bp

    li    $t0,38
    subfe    $t1,$t1,$t1        # carry -> ~mask
    andc    $t1,$t0,$t1

    addc    $a0,$a0,$t1
    addze    $a1,$a1
    addze    $a2,$a2
    addze    $a3,$a3

    subfe    $t1,$t1,$t1        # carry -> ~mask
    std    $a1,8($rp)
    andc    $t0,$t0,$t1
    std    $a2,16($rp)
    add    $a0,$a0,$t0
    std    $a3,24($rp)
    std    $a0,0($rp)

    blr
    .long    0
    .byte    0,12,0x14,0,0,0,3,0
    .long    0
.size    x25519_fe64_add,.-x25519_fe64_add

.globl    x25519_fe64_sub
.type    x25519_fe64_sub,\@function
.align    5
x25519_fe64_sub:
    ld    $a0,0($ap)
    ld    $t0,0($bp)
    ld    $a1,8($ap)
    ld    $t1,8($bp)
    ld    $a2,16($ap)
    ld    $bi,16($bp)
    ld    $a3,24($ap)
    ld    $bp,24($bp)

    subfc    $a0,$t0,$a0
    subfe    $a1,$t1,$a1
    subfe    $a2,$bi,$a2
    subfe    $a3,$bp,$a3

    li    $t0,38
    subfe    $t1,$t1,$t1        # borrow -> mask
    xor    $zero,$zero,$zero
    and    $t1,$t0,$t1

    subfc    $a0,$t1,$a0
    subfe    $a1,$zero,$a1
    subfe    $a2,$zero,$a2
    subfe    $a3,$zero,$a3

    subfe    $t1,$t1,$t1        # borrow -> mask
    std    $a1,8($rp)
    and    $t0,$t0,$t1
    std    $a2,16($rp)
    subf    $a0,$t0,$a0
    std    $a3,24($rp)
    std    $a0,0($rp)

    blr
    .long    0
    .byte    0,12,0x14,0,0,0,3,0
    .long    0
.size    x25519_fe64_sub,.-x25519_fe64_sub

.globl    x25519_fe64_tobytes
.type    x25519_fe64_tobytes,\@function
.align    5
x25519_fe64_tobytes:
    ld    $a3,24($ap)
    ld    $a0,0($ap)
    ld    $a1,8($ap)
    ld    $a2,16($ap)

    sradi    $t0,$a3,63        # most significant bit -> mask
    li    $t1,19
    and    $t0,$t0,$t1
    sldi    $a3,$a3,1
    add    $t0,$t0,$t1        # compare to modulus in the same go
    srdi    $a3,$a3,1        # most significant bit cleared

    addc    $a0,$a0,$t0
    addze    $a1,$a1
    addze    $a2,$a2
    addze    $a3,$a3

    xor    $zero,$zero,$zero
    sradi    $t0,$a3,63        # most significant bit -> mask
    sldi    $a3,$a3,1
    andc    $t0,$t1,$t0
    srdi    $a3,$a3,1        # most significant bit cleared

    subi    $rp,$rp,1
    subfc    $a0,$t0,$a0
    subfe    $a1,$zero,$a1
    subfe    $a2,$zero,$a2
    subfe    $a3,$zero,$a3

___
for (my @a=($a0,$a1,$a2,$a3), my $i=0; $i<4; shift(@a), $i++) {
$code.=<<___;
    srdi    $t0,@a[0],8
    stbu    @a[0],1($rp)
    srdi    @a[0],@a[0],16
    stbu    $t0,1($rp)
    srdi    $t0,@a[0],8
    stbu    @a[0],1($rp)
    srdi    @a[0],@a[0],16
    stbu    $t0,1($rp)
    srdi    $t0,@a[0],8
    stbu    @a[0],1($rp)
    srdi    @a[0],@a[0],16
    stbu    $t0,1($rp)
    srdi    $t0,@a[0],8
    stbu    @a[0],1($rp)
    stbu    $t0,1($rp)
___
}
$code.=<<___;
    blr
    .long    0
    .byte    0,12,0x14,0,0,0,2,0
    .long    0
.size    x25519_fe64_tobytes,.-x25519_fe64_tobytes
___
}
####################################################### base 2^51
{
my ($bi,$a0,$a1,$a2,$a3,$a4,$t0, $t1,
    $h0lo,$h0hi,$h1lo,$h1hi,$h2lo,$h2hi,$h3lo,$h3hi,$h4lo,$h4hi) =
    map("r$_",(6..12,21..31));
my $mask = "r0";
my $FRAME = 18*8;

$code.=<<___;
.text

.globl    x25519_fe51_mul
.type    x25519_fe51_mul,\@function
.align    5
x25519_fe51_mul:
    stdu    $sp,-$FRAME($sp)
    std    r21,`$FRAME-8*11`($sp)
    std    r22,`$FRAME-8*10`($sp)
    std    r23,`$FRAME-8*9`($sp)
    std    r24,`$FRAME-8*8`($sp)
    std    r25,`$FRAME-8*7`($sp)
    std    r26,`$FRAME-8*6`($sp)
    std    r27,`$FRAME-8*5`($sp)
    std    r28,`$FRAME-8*4`($sp)
    std    r29,`$FRAME-8*3`($sp)
    std    r30,`$FRAME-8*2`($sp)
    std    r31,`$FRAME-8*1`($sp)

    ld    $bi,0($bp)
    ld    $a0,0($ap)
    ld    $a1,8($ap)
    ld    $a2,16($ap)
    ld    $a3,24($ap)
    ld    $a4,32($ap)

    mulld    $h0lo,$a0,$bi        # a[0]*b[0]
    mulhdu    $h0hi,$a0,$bi

    mulld    $h1lo,$a1,$bi        # a[1]*b[0]
    mulhdu    $h1hi,$a1,$bi

     mulld    $h4lo,$a4,$bi        # a[4]*b[0]
     mulhdu    $h4hi,$a4,$bi
     ld    $ap,8($bp)
     mulli    $a4,$a4,19

    mulld    $h2lo,$a2,$bi        # a[2]*b[0]
    mulhdu    $h2hi,$a2,$bi

    mulld    $h3lo,$a3,$bi        # a[3]*b[0]
    mulhdu    $h3hi,$a3,$bi
___
for(my @a=($a0,$a1,$a2,$a3,$a4),
    my $i=1; $i<4; $i++) {
    ($ap,$bi) = ($bi,$ap);
$code.=<<___;
    mulld    $t0,@a[4],$bi
    mulhdu    $t1,@a[4],$bi
    addc    $h0lo,$h0lo,$t0
    adde    $h0hi,$h0hi,$t1

    mulld    $t0,@a[0],$bi
    mulhdu    $t1,@a[0],$bi
    addc    $h1lo,$h1lo,$t0
    adde    $h1hi,$h1hi,$t1

     mulld    $t0,@a[3],$bi
     mulhdu    $t1,@a[3],$bi
     ld    $ap,`8*($i+1)`($bp)
     mulli    @a[3],@a[3],19
     addc    $h4lo,$h4lo,$t0
     adde    $h4hi,$h4hi,$t1

    mulld    $t0,@a[1],$bi
    mulhdu    $t1,@a[1],$bi
    addc    $h2lo,$h2lo,$t0
    adde    $h2hi,$h2hi,$t1

    mulld    $t0,@a[2],$bi
    mulhdu    $t1,@a[2],$bi
    addc    $h3lo,$h3lo,$t0
    adde    $h3hi,$h3hi,$t1
___
    unshift(@a,pop(@a));
}
    ($ap,$bi) = ($bi,$ap);
$code.=<<___;
    mulld    $t0,$a1,$bi
    mulhdu    $t1,$a1,$bi
    addc    $h0lo,$h0lo,$t0
    adde    $h0hi,$h0hi,$t1

    mulld    $t0,$a2,$bi
    mulhdu    $t1,$a2,$bi
    addc    $h1lo,$h1lo,$t0
    adde    $h1hi,$h1hi,$t1

    mulld    $t0,$a3,$bi
    mulhdu    $t1,$a3,$bi
    addc    $h2lo,$h2lo,$t0
    adde    $h2hi,$h2hi,$t1

    mulld    $t0,$a4,$bi
    mulhdu    $t1,$a4,$bi
    addc    $h3lo,$h3lo,$t0
    adde    $h3hi,$h3hi,$t1

    mulld    $t0,$a0,$bi
    mulhdu    $t1,$a0,$bi
    addc    $h4lo,$h4lo,$t0
    adde    $h4hi,$h4hi,$t1

.Lfe51_reduce:
    li    $mask,-1
    srdi    $mask,$mask,13        # 0x7ffffffffffff

    srdi    $t0,$h2lo,51
    and    $a2,$h2lo,$mask
    insrdi    $t0,$h2hi,51,0        # h2>>51
     srdi    $t1,$h0lo,51
     and    $a0,$h0lo,$mask
     insrdi    $t1,$h0hi,51,0        # h0>>51
    addc    $h3lo,$h3lo,$t0
    addze    $h3hi,$h3hi
     addc    $h1lo,$h1lo,$t1
     addze    $h1hi,$h1hi

    srdi    $t0,$h3lo,51
    and    $a3,$h3lo,$mask
    insrdi    $t0,$h3hi,51,0        # h3>>51
     srdi    $t1,$h1lo,51
     and    $a1,$h1lo,$mask
     insrdi    $t1,$h1hi,51,0        # h1>>51
    addc    $h4lo,$h4lo,$t0
    addze    $h4hi,$h4hi
     add    $a2,$a2,$t1

    srdi    $t0,$h4lo,51
    and    $a4,$h4lo,$mask
    insrdi    $t0,$h4hi,51,0
    mulli    $t0,$t0,19        # (h4 >> 51) * 19

    add    $a0,$a0,$t0

    srdi    $t1,$a2,51
    and    $a2,$a2,$mask
    add    $a3,$a3,$t1

    srdi    $t0,$a0,51
    and    $a0,$a0,$mask
    add    $a1,$a1,$t0

    std    $a2,16($rp)
    std    $a3,24($rp)
    std    $a4,32($rp)
    std    $a0,0($rp)
    std    $a1,8($rp)

    ld    r21,`$FRAME-8*11`($sp)
    ld    r22,`$FRAME-8*10`($sp)
    ld    r23,`$FRAME-8*9`($sp)
    ld    r24,`$FRAME-8*8`($sp)
    ld    r25,`$FRAME-8*7`($sp)
    ld    r26,`$FRAME-8*6`($sp)
    ld    r27,`$FRAME-8*5`($sp)
    ld    r28,`$FRAME-8*4`($sp)
    ld    r29,`$FRAME-8*3`($sp)
    ld    r30,`$FRAME-8*2`($sp)
    ld    r31,`$FRAME-8*1`($sp)
    addi    $sp,$sp,$FRAME
    blr
    .long    0
    .byte    0,12,4,0,0x80,11,3,0
    .long    0
.size    x25519_fe51_mul,.-x25519_fe51_mul
___
{
my ($a0,$a1,$a2,$a3,$a4,$t0,$t1) = ($a0,$a1,$a2,$a3,$a4,$t0,$t1);
$code.=<<___;
.globl    x25519_fe51_sqr
.type    x25519_fe51_sqr,\@function
.align    5
x25519_fe51_sqr:
    stdu    $sp,-$FRAME($sp)
    std    r21,`$FRAME-8*11`($sp)
    std    r22,`$FRAME-8*10`($sp)
    std    r23,`$FRAME-8*9`($sp)
    std    r24,`$FRAME-8*8`($sp)
    std    r25,`$FRAME-8*7`($sp)
    std    r26,`$FRAME-8*6`($sp)
    std    r27,`$FRAME-8*5`($sp)
    std    r28,`$FRAME-8*4`($sp)
    std    r29,`$FRAME-8*3`($sp)
    std    r30,`$FRAME-8*2`($sp)
    std    r31,`$FRAME-8*1`($sp)

    ld    $a0,0($ap)
    ld    $a1,8($ap)
    ld    $a2,16($ap)
    ld    $a3,24($ap)
    ld    $a4,32($ap)

    add    $bi,$a0,$a0        # a[0]*2
    mulli    $t1,$a4,19        # a[4]*19

    mulld    $h0lo,$a0,$a0
    mulhdu    $h0hi,$a0,$a0
    mulld    $h1lo,$a1,$bi
    mulhdu    $h1hi,$a1,$bi
    mulld    $h2lo,$a2,$bi
    mulhdu    $h2hi,$a2,$bi
    mulld    $h3lo,$a3,$bi
    mulhdu    $h3hi,$a3,$bi
    mulld    $h4lo,$a4,$bi
    mulhdu    $h4hi,$a4,$bi
    add    $bi,$a1,$a1        # a[1]*2
___
    ($a4,$t1) = ($t1,$a4);
$code.=<<___;
    mulld    $t0,$t1,$a4
    mulhdu    $t1,$t1,$a4
    addc    $h3lo,$h3lo,$t0
    adde    $h3hi,$h3hi,$t1

    mulli    $bp,$a3,19        # a[3]*19

    mulld    $t0,$a1,$a1
    mulhdu    $t1,$a1,$a1
    addc    $h2lo,$h2lo,$t0
    adde    $h2hi,$h2hi,$t1
    mulld    $t0,$a2,$bi
    mulhdu    $t1,$a2,$bi
    addc    $h3lo,$h3lo,$t0
    adde    $h3hi,$h3hi,$t1
    mulld    $t0,$a3,$bi
    mulhdu    $t1,$a3,$bi
    addc    $h4lo,$h4lo,$t0
    adde    $h4hi,$h4hi,$t1
    mulld    $t0,$a4,$bi
    mulhdu    $t1,$a4,$bi
    add    $bi,$a3,$a3        # a[3]*2
    addc    $h0lo,$h0lo,$t0
    adde    $h0hi,$h0hi,$t1
___
    ($a3,$t1) = ($bp,$a3);
$code.=<<___;
    mulld    $t0,$t1,$a3
    mulhdu    $t1,$t1,$a3
    addc    $h1lo,$h1lo,$t0
    adde    $h1hi,$h1hi,$t1
    mulld    $t0,$bi,$a4
    mulhdu    $t1,$bi,$a4
    add    $bi,$a2,$a2        # a[2]*2
    addc    $h2lo,$h2lo,$t0
    adde    $h2hi,$h2hi,$t1

    mulld    $t0,$a2,$a2
    mulhdu    $t1,$a2,$a2
    addc    $h4lo,$h4lo,$t0
    adde    $h4hi,$h4hi,$t1
    mulld    $t0,$a3,$bi
    mulhdu    $t1,$a3,$bi
    addc    $h0lo,$h0lo,$t0
    adde    $h0hi,$h0hi,$t1
    mulld    $t0,$a4,$bi
    mulhdu    $t1,$a4,$bi
    addc    $h1lo,$h1lo,$t0
    adde    $h1hi,$h1hi,$t1

    b    .Lfe51_reduce
    .long    0
    .byte    0,12,4,0,0x80,11,2,0
    .long    0
.size    x25519_fe51_sqr,.-x25519_fe51_sqr
___
}
$code.=<<___;
.globl    x25519_fe51_mul121666
.type    x25519_fe51_mul121666,\@function
.align    5
x25519_fe51_mul121666:
    stdu    $sp,-$FRAME($sp)
    std    r21,`$FRAME-8*11`($sp)
    std    r22,`$FRAME-8*10`($sp)
    std    r23,`$FRAME-8*9`($sp)
    std    r24,`$FRAME-8*8`($sp)
    std    r25,`$FRAME-8*7`($sp)
    std    r26,`$FRAME-8*6`($sp)
    std    r27,`$FRAME-8*5`($sp)
    std    r28,`$FRAME-8*4`($sp)
    std    r29,`$FRAME-8*3`($sp)
    std    r30,`$FRAME-8*2`($sp)
    std    r31,`$FRAME-8*1`($sp)

    lis    $bi,`65536>>16`
    ori    $bi,$bi,`121666-65536`
    ld    $a0,0($ap)
    ld    $a1,8($ap)
    ld    $a2,16($ap)
    ld    $a3,24($ap)
    ld    $a4,32($ap)

    mulld    $h0lo,$a0,$bi        # a[0]*121666
    mulhdu    $h0hi,$a0,$bi
    mulld    $h1lo,$a1,$bi        # a[1]*121666
    mulhdu    $h1hi,$a1,$bi
    mulld    $h2lo,$a2,$bi        # a[2]*121666
    mulhdu    $h2hi,$a2,$bi
    mulld    $h3lo,$a3,$bi        # a[3]*121666
    mulhdu    $h3hi,$a3,$bi
    mulld    $h4lo,$a4,$bi        # a[4]*121666
    mulhdu    $h4hi,$a4,$bi

    b    .Lfe51_reduce
    .long    0
    .byte    0,12,4,0,0x80,11,2,0
    .long    0
.size    x25519_fe51_mul121666,.-x25519_fe51_mul121666
___
}

$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT or die "error closing STDOUT: $!";
:: Command execute ::
Enter:	Select: