Merge remote-tracking branch 'origin/master' into release-1.14

Kubernetes-commit: 2676471cfcb269267a16a2c7cc2df369944ffcc2
This commit is contained in:
Kubernetes Publisher
2019-03-08 09:03:33 +00:00
parent d4100cdc44
commit 68a6c9bc69
365 changed files with 59718 additions and 269 deletions
+80
View File
@@ -154,6 +154,86 @@
"ImportPath": "golang.org/x/tools/internal/semver",
"Rev": "7f7074d5bcfd282eb16bc382b0bb3da762461985"
},
{
"ImportPath": "gonum.org/v1/gonum/blas",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/blas/blas64",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/blas/gonum",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/floats",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/internal/linear",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/internal/ordered",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/internal/set",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/internal/uid",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/simple",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/topo",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/graph/traverse",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/internal/asm/c128",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/internal/asm/f32",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/internal/asm/f64",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/internal/math32",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/lapack",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/lapack/gonum",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/lapack/lapack64",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "gonum.org/v1/gonum/mat",
"Rev": "cebdade430ccb61c1feba4878085f6cf8cb3320e"
},
{
"ImportPath": "k8s.io/gengo/args",
"Rev": "51747d6e00da1fc578d5a333a93bb2abcbce7a95"
+81 -2
View File
@@ -25,16 +25,20 @@ import (
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
flag "github.com/spf13/pflag"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/simple"
"gonum.org/v1/gonum/graph/topo"
"k8s.io/code-generator/pkg/util"
"k8s.io/gengo/args"
"k8s.io/gengo/generator"
"k8s.io/gengo/namer"
"k8s.io/gengo/parser"
"k8s.io/gengo/types"
flag "github.com/spf13/pflag"
)
type Generator struct {
@@ -202,6 +206,18 @@ func Run(g *Generator) {
c.Verify = g.Common.VerifyOnly
c.FileTypes["protoidl"] = NewProtoFile()
// order package by imports, importees first
deps := deps(c, protobufNames.packages)
order, err := importOrder(deps)
if err != nil {
log.Fatalf("Failed to order packages by imports: %v", err)
}
topologicalPos := map[string]int{}
for i, p := range order {
topologicalPos[p] = i
}
sort.Sort(positionOrder{topologicalPos, protobufNames.packages})
var vendoredOutputPackages, localOutputPackages generator.Packages
for _, p := range protobufNames.packages {
if _, ok := nonOutputPackages[p.Name()]; ok {
@@ -347,3 +363,66 @@ func Run(g *Generator) {
}
}
}
func deps(c *generator.Context, pkgs []*protobufPackage) map[string][]string {
ret := map[string][]string{}
for _, p := range pkgs {
for _, d := range c.Universe[p.PackagePath].Imports {
ret[p.PackagePath] = append(ret[p.PackagePath], d.Path)
}
}
return ret
}
func importOrder(deps map[string][]string) ([]string, error) {
nodes := map[string]graph.Node{}
names := map[int64]string{}
g := simple.NewDirectedGraph()
for pkg, imports := range deps {
for _, imp := range imports {
if _, found := nodes[pkg]; !found {
n := g.NewNode()
g.AddNode(n)
nodes[pkg] = n
names[n.ID()] = pkg
}
if _, found := nodes[imp]; !found {
n := g.NewNode()
g.AddNode(n)
nodes[imp] = n
names[n.ID()] = imp
}
g.SetEdge(g.NewEdge(nodes[imp], nodes[pkg]))
}
}
ret := []string{}
sorted, err := topo.Sort(g)
if err != nil {
return nil, err
}
for _, n := range sorted {
ret = append(ret, names[n.ID()])
fmt.Println("topological order", names[n.ID()])
}
return ret, nil
}
type positionOrder struct {
pos map[string]int
elements []*protobufPackage
}
func (o positionOrder) Len() int {
return len(o.elements)
}
func (o positionOrder) Less(i, j int) bool {
return o.pos[o.elements[i].PackagePath] < o.pos[o.elements[j].PackagePath]
}
func (o positionOrder) Swap(i, j int) {
x := o.elements[i]
o.elements[i] = o.elements[j]
o.elements[j] = x
}
+70
View File
@@ -0,0 +1,70 @@
# This is the official list of gonum authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Brendan Tracey <tracey.brendan@gmail.com>
Bill Gray <wgray@gogray.com>
Bill Noon <noon.bill@gmail.com>
Chad Kunde <kunde21@gmail.com>
Chih-Wei Chang <bert.cwchang@gmail.com>
Chris Tessum <ctessum@gmail.com>
Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
Daniel Fireman <danielfireman@gmail.com>
David Samborski <bloggingarrow@gmail.com>
Davor Kapsa <davor.kapsa@gmail.com>
Egon Elbre <egonelbre@gmail.com>
Ekaterina Efimova <katerina.efimova@gmail.com>
Ethan Burns <burns.ethan@gmail.com>
Evert Lammerts <evert.lammerts@gmail.com>
Facundo Gaich <facugaich@gmail.com>
Fazlul Shahriar <fshahriar@gmail.com>
Francesc Campoy <campoy@golang.org>
Google Inc
Gustaf Johansson <gustaf@pinon.se>
Iakov Davydov <iakov.davydov@unil.ch>
Jalem Raj Rohit <jrajrohit33@gmail.com>
James Bell <james@stellentus.com>
James Bowman <james.edward.bowman@gmail.com>
James Holmes <32bitkid@gmail.com>
Janne Snabb <snabb@epipe.com>
Jeff Juozapaitis <jjjuozap@email.arizona.edu>
Jeremy Atkinson <jchatkinson@gmail.com>
Jonas Kahler <jonas@derkahler.de>
Jonathan J Lawlor <jonathan.lawlor@gmail.com>
Jonathan Schroeder <jd.schroeder@gmail.com>
Joseph Watson <jtwatson@linux-consulting.us>
Josh Wilson <josh.craig.wilson@gmail.com>
Julien Roland <juroland@gmail.com>
Kent English <kent.english@gmail.com>
Kevin C. Zimmerman <kevinczimmerman@gmail.com>
Konstantin Shaposhnikov <k.shaposhnikov@gmail.com>
Leonid Kneller <recondite.matter@gmail.com>
Lyron Winderbaum <lyron.winderbaum@student.adelaide.edu.au>
Matthieu Di Mercurio <matthieu.dimercurio@gmail.com>
Max Halford <maxhalford25@gmail.com>
MinJae Kwon <k239507@gmail.com>
Or Rikon <rikonor@gmail.com>
Pontus Melke <pontusmelke@gmail.com>
Renée French
Robin Eklind <r.eklind.87@gmail.com>
Samuel Kelemen <Samuel@Kelemen.us>
Sam Zaydel <szaydel@gmail.com>
Scott Holden <scott@sshconnection.com>
Sebastien Binet <seb.binet@gmail.com>
source{d} <hello@sourced.tech>
Shawn Smith <shawnpsmith@gmail.com>
Spencer Lyon <spencerlyon2@gmail.com>
Steve McCoy <mccoyst@gmail.com>
Takeshi Yoneda <cz.rk.t0415y.g@gmail.com>
The University of Adelaide
The University of Minnesota
The University of Washington
Tobin Harding <me@tobin.cc>
Vladimír Chalupecký <vladimir.chalupecky@gmail.com>
Yevgeniy Vahlis <evahlis@gmail.com>
+73
View File
@@ -0,0 +1,73 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the gonum
# repository.
#
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees would be listed here
# but not in AUTHORS, because Google would hold the copyright.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file.
#
# Names should be added to this file like so:
# Name <email address>
#
# Please keep the list sorted.
Andrew Brampton <brampton@gmail.com>
Brendan Tracey <tracey.brendan@gmail.com>
Bill Gray <wgray@gogray.com>
Bill Noon <noon.bill@gmail.com>
Chad Kunde <kunde21@gmail.com>
Chih-Wei Chang <bert.cwchang@gmail.com>
Chris Tessum <ctessum@gmail.com>
Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
Daniel Fireman <danielfireman@gmail.com>
David Samborski <bloggingarrow@gmail.com>
Davor Kapsa <davor.kapsa@gmail.com>
Egon Elbre <egonelbre@gmail.com>
Ekaterina Efimova <katerina.efimova@gmail.com>
Ethan Burns <burns.ethan@gmail.com>
Evert Lammerts <evert.lammerts@gmail.com>
Facundo Gaich <facugaich@gmail.com>
Fazlul Shahriar <fshahriar@gmail.com>
Francesc Campoy <campoy@golang.org>
Gustaf Johansson <gustaf@pinon.se>
Iakov Davydov <iakov.davydov@unil.ch>
Jalem Raj Rohit <jrajrohit33@gmail.com>
James Bell <james@stellentus.com>
James Bowman <james.edward.bowman@gmail.com>
James Holmes <32bitkid@gmail.com>
Janne Snabb <snabb@epipe.com>
Jeff Juozapaitis <jjjuozap@email.arizona.edu>
Jeremy Atkinson <jchatkinson@gmail.com>
Jonas Kahler <jonas@derkahler.de>
Jonathan J Lawlor <jonathan.lawlor@gmail.com>
Jonathan Schroeder <jd.schroeder@gmail.com>
Joseph Watson <jtwatson@linux-consulting.us>
Josh Wilson <josh.craig.wilson@gmail.com>
Julien Roland <juroland@gmail.com>
Kent English <kent.english@gmail.com>
Kevin C. Zimmerman <kevinczimmerman@gmail.com>
Konstantin Shaposhnikov <k.shaposhnikov@gmail.com>
Leonid Kneller <recondite.matter@gmail.com>
Lyron Winderbaum <lyron.winderbaum@student.adelaide.edu.au>
Matthieu Di Mercurio <matthieu.dimercurio@gmail.com>
Max Halford <maxhalford25@gmail.com>
MinJae Kwon <k239507@gmail.com>
Or Rikon <rikonor@gmail.com>
Pontus Melke <pontusmelke@gmail.com>
Renée French
Robin Eklind <r.eklind.87@gmail.com>
Samuel Kelemen <Samuel@Kelemen.us>
Sam Zaydel <szaydel@gmail.com>
Scott Holden <scott@sshconnection.com>
Sebastien Binet <seb.binet@gmail.com>
Shawn Smith <shawnpsmith@gmail.com>
Spencer Lyon <spencerlyon2@gmail.com>
Steve McCoy <mccoyst@gmail.com>
Takeshi Yoneda <cz.rk.t0415y.g@gmail.com>
Tobin Harding <me@tobin.cc>
Vladimír Chalupecký <vladimir.chalupecky@gmail.com>
Yevgeniy Vahlis <evahlis@gmail.com>
+23
View File
@@ -0,0 +1,23 @@
Copyright ©2013 The Gonum Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the gonum project nor the names of its authors and
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+47
View File
@@ -0,0 +1,47 @@
# Gonum BLAS [![GoDoc](https://godoc.org/gonum.org/v1/gonum/blas?status.svg)](https://godoc.org/gonum.org/v1/gonum/blas)
A collection of packages to provide BLAS functionality for the [Go programming
language](http://golang.org)
## Installation
```sh
go get gonum.org/v1/gonum/blas/...
```
## Packages
### blas
Defines [BLAS API](http://www.netlib.org/blas/blast-forum/cinterface.pdf) split in several
interfaces.
### blas/gonum
Go implementation of the BLAS API (incomplete, implements the `float32` and `float64` API).
### blas/blas64 and blas/blas32
Wrappers for an implementation of the double (i.e., `float64`) and single (`float32`)
precision real parts of the BLAS API.
```Go
package main
import (
"fmt"
"gonum.org/v1/gonum/blas/blas64"
)
func main() {
v := blas64.Vector{Inc: 1, Data: []float64{1, 1, 1}}
fmt.Println("v has length:", blas64.Nrm2(len(v.Data), v))
}
```
### blas/cblas128 and blas/cblas64
Wrappers for an implementation of the double (i.e., `complex128`) and single (`complex64`)
precision complex parts of the blas API.
Currently blas/cblas64 and blas/cblas128 require gonum.org/v1/netlib/blas.
+287
View File
@@ -0,0 +1,287 @@
// Copyright ©2013 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate ./conversions.bash
package blas
// Flag constants indicate Givens transformation H matrix state.
type Flag int
const (
Identity Flag = -2 // H is the identity matrix; no rotation is needed.
Rescaling Flag = -1 // H specifies rescaling.
OffDiagonal Flag = 0 // Off-diagonal elements of H are non-unit.
Diagonal Flag = 1 // Diagonal elements of H are non-unit.
)
// SrotmParams contains Givens transformation parameters returned
// by the Float32 Srotm method.
type SrotmParams struct {
Flag
H [4]float32 // Column-major 2 by 2 matrix.
}
// DrotmParams contains Givens transformation parameters returned
// by the Float64 Drotm method.
type DrotmParams struct {
Flag
H [4]float64 // Column-major 2 by 2 matrix.
}
// Transpose is used to specify the transposition operation for a
// routine.
type Transpose int
const (
NoTrans Transpose = 111 + iota
Trans
ConjTrans
)
// Uplo is used to specify whether the matrix is an upper or lower
// triangular matrix.
type Uplo int
const (
All Uplo = 120 + iota
Upper
Lower
)
// Diag is used to specify whether the matrix is a unit or non-unit
// triangular matrix.
type Diag int
const (
NonUnit Diag = 131 + iota
Unit
)
// Side is used to specify from which side a multiplication operation
// is performed.
type Side int
const (
Left Side = 141 + iota
Right
)
// Float32 implements the single precision real BLAS routines.
type Float32 interface {
Float32Level1
Float32Level2
Float32Level3
}
// Float32Level1 implements the single precision real BLAS Level 1 routines.
type Float32Level1 interface {
Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32
Dsdot(n int, x []float32, incX int, y []float32, incY int) float64
Sdot(n int, x []float32, incX int, y []float32, incY int) float32
Snrm2(n int, x []float32, incX int) float32
Sasum(n int, x []float32, incX int) float32
Isamax(n int, x []float32, incX int) int
Sswap(n int, x []float32, incX int, y []float32, incY int)
Scopy(n int, x []float32, incX int, y []float32, incY int)
Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int)
Srotg(a, b float32) (c, s, r, z float32)
Srotmg(d1, d2, b1, b2 float32) (p SrotmParams, rd1, rd2, rb1 float32)
Srot(n int, x []float32, incX int, y []float32, incY int, c, s float32)
Srotm(n int, x []float32, incX int, y []float32, incY int, p SrotmParams)
Sscal(n int, alpha float32, x []float32, incX int)
}
// Float32Level2 implements the single precision real BLAS Level 2 routines.
type Float32Level2 interface {
Sgemv(tA Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Sgbmv(tA Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Strmv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int)
Stbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int)
Stpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int)
Strsv(ul Uplo, tA Transpose, d Diag, n int, a []float32, lda int, x []float32, incX int)
Stbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float32, lda int, x []float32, incX int)
Stpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float32, x []float32, incX int)
Ssymv(ul Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Ssbmv(ul Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Sspmv(ul Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int)
Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)
Ssyr(ul Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int)
Sspr(ul Uplo, n int, alpha float32, x []float32, incX int, ap []float32)
Ssyr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)
Sspr2(ul Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32)
}
// Float32Level3 implements the single precision real BLAS Level 3 routines.
type Float32Level3 interface {
Sgemm(tA, tB Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Ssymm(s Side, ul Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Ssyrk(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int)
Ssyr2k(ul Uplo, t Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Strmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)
Strsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)
}
// Float64 implements the single precision real BLAS routines.
type Float64 interface {
Float64Level1
Float64Level2
Float64Level3
}
// Float64Level1 implements the double precision real BLAS Level 1 routines.
type Float64Level1 interface {
Ddot(n int, x []float64, incX int, y []float64, incY int) float64
Dnrm2(n int, x []float64, incX int) float64
Dasum(n int, x []float64, incX int) float64
Idamax(n int, x []float64, incX int) int
Dswap(n int, x []float64, incX int, y []float64, incY int)
Dcopy(n int, x []float64, incX int, y []float64, incY int)
Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int)
Drotg(a, b float64) (c, s, r, z float64)
Drotmg(d1, d2, b1, b2 float64) (p DrotmParams, rd1, rd2, rb1 float64)
Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64)
Drotm(n int, x []float64, incX int, y []float64, incY int, p DrotmParams)
Dscal(n int, alpha float64, x []float64, incX int)
}
// Float64Level2 implements the double precision real BLAS Level 2 routines.
type Float64Level2 interface {
Dgemv(tA Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dgbmv(tA Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dtrmv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int)
Dtbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int)
Dtpmv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int)
Dtrsv(ul Uplo, tA Transpose, d Diag, n int, a []float64, lda int, x []float64, incX int)
Dtbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []float64, lda int, x []float64, incX int)
Dtpsv(ul Uplo, tA Transpose, d Diag, n int, ap []float64, x []float64, incX int)
Dsymv(ul Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dsbmv(ul Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dspmv(ul Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int)
Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)
Dsyr(ul Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int)
Dspr(ul Uplo, n int, alpha float64, x []float64, incX int, ap []float64)
Dsyr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)
Dspr2(ul Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64)
}
// Float64Level3 implements the double precision real BLAS Level 3 routines.
type Float64Level3 interface {
Dgemm(tA, tB Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dsymm(s Side, ul Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dsyrk(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int)
Dsyr2k(ul Uplo, t Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dtrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)
Dtrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)
}
// Complex64 implements the single precision complex BLAS routines.
type Complex64 interface {
Complex64Level1
Complex64Level2
Complex64Level3
}
// Complex64Level1 implements the single precision complex BLAS Level 1 routines.
type Complex64Level1 interface {
Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64)
Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64)
Scnrm2(n int, x []complex64, incX int) float32
Scasum(n int, x []complex64, incX int) float32
Icamax(n int, x []complex64, incX int) int
Cswap(n int, x []complex64, incX int, y []complex64, incY int)
Ccopy(n int, x []complex64, incX int, y []complex64, incY int)
Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int)
Cscal(n int, alpha complex64, x []complex64, incX int)
Csscal(n int, alpha float32, x []complex64, incX int)
}
// Complex64Level2 implements the single precision complex BLAS routines Level 2 routines.
type Complex64Level2 interface {
Cgemv(tA Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Cgbmv(tA Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Ctrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int)
Ctbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int)
Ctpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int)
Ctrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex64, lda int, x []complex64, incX int)
Ctbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex64, lda int, x []complex64, incX int)
Ctpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex64, x []complex64, incX int)
Chemv(ul Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Chbmv(ul Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)
Chpmv(ul Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int)
Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)
Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)
Cher(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int)
Chpr(ul Uplo, n int, alpha float32, x []complex64, incX int, a []complex64)
Cher2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)
Chpr2(ul Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64)
}
// Complex64Level3 implements the single precision complex BLAS Level 3 routines.
type Complex64Level3 interface {
Cgemm(tA, tB Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Csymm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Csyrk(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int)
Csyr2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Ctrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)
Ctrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)
Chemm(s Side, ul Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)
Cherk(ul Uplo, t Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int)
Cher2k(ul Uplo, t Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int)
}
// Complex128 implements the double precision complex BLAS routines.
type Complex128 interface {
Complex128Level1
Complex128Level2
Complex128Level3
}
// Complex128Level1 implements the double precision complex BLAS Level 1 routines.
type Complex128Level1 interface {
Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128)
Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128)
Dznrm2(n int, x []complex128, incX int) float64
Dzasum(n int, x []complex128, incX int) float64
Izamax(n int, x []complex128, incX int) int
Zswap(n int, x []complex128, incX int, y []complex128, incY int)
Zcopy(n int, x []complex128, incX int, y []complex128, incY int)
Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int)
Zscal(n int, alpha complex128, x []complex128, incX int)
Zdscal(n int, alpha float64, x []complex128, incX int)
}
// Complex128Level2 implements the double precision complex BLAS Level 2 routines.
type Complex128Level2 interface {
Zgemv(tA Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zgbmv(tA Transpose, m, n int, kL int, kU int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Ztrmv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int)
Ztbmv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int)
Ztpmv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int)
Ztrsv(ul Uplo, tA Transpose, d Diag, n int, a []complex128, lda int, x []complex128, incX int)
Ztbsv(ul Uplo, tA Transpose, d Diag, n, k int, a []complex128, lda int, x []complex128, incX int)
Ztpsv(ul Uplo, tA Transpose, d Diag, n int, ap []complex128, x []complex128, incX int)
Zhemv(ul Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zhbmv(ul Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zhpmv(ul Uplo, n int, alpha complex128, ap []complex128, x []complex128, incX int, beta complex128, y []complex128, incY int)
Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
Zher(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int)
Zhpr(ul Uplo, n int, alpha float64, x []complex128, incX int, a []complex128)
Zher2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
Zhpr2(ul Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ap []complex128)
}
// Complex128Level3 implements the double precision complex BLAS Level 3 routines.
type Complex128Level3 interface {
Zgemm(tA, tB Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Zsymm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Zsyrk(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int)
Zsyr2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Ztrmm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)
Ztrsm(s Side, ul Uplo, tA Transpose, d Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)
Zhemm(s Side, ul Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
Zherk(ul Uplo, t Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int)
Zher2k(ul Uplo, t Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)
}
@@ -0,0 +1,445 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/gonum"
)
var blas64 blas.Float64 = gonum.Implementation{}
// Use sets the BLAS float64 implementation to be used by subsequent BLAS calls.
// The default implementation is native.Implementation.
func Use(b blas.Float64) {
blas64 = b
}
// Implementation returns the current BLAS float64 implementation.
//
// Implementation allows direct calls to the current the BLAS float64 implementation
// giving finer control of parameters.
func Implementation() blas.Float64 {
return blas64
}
// Vector represents a vector with an associated element increment.
type Vector struct {
Inc int
Data []float64
}
// General represents a matrix using the conventional storage scheme.
type General struct {
Rows, Cols int
Stride int
Data []float64
}
// Band represents a band matrix using the band storage scheme.
type Band struct {
Rows, Cols int
KL, KU int
Stride int
Data []float64
}
// Triangular represents a triangular matrix using the conventional storage scheme.
type Triangular struct {
N int
Stride int
Data []float64
Uplo blas.Uplo
Diag blas.Diag
}
// TriangularBand represents a triangular matrix using the band storage scheme.
type TriangularBand struct {
N, K int
Stride int
Data []float64
Uplo blas.Uplo
Diag blas.Diag
}
// TriangularPacked represents a triangular matrix using the packed storage scheme.
type TriangularPacked struct {
N int
Data []float64
Uplo blas.Uplo
Diag blas.Diag
}
// Symmetric represents a symmetric matrix using the conventional storage scheme.
type Symmetric struct {
N int
Stride int
Data []float64
Uplo blas.Uplo
}
// SymmetricBand represents a symmetric matrix using the band storage scheme.
type SymmetricBand struct {
N, K int
Stride int
Data []float64
Uplo blas.Uplo
}
// SymmetricPacked represents a symmetric matrix using the packed storage scheme.
type SymmetricPacked struct {
N int
Data []float64
Uplo blas.Uplo
}
// Level 1
const negInc = "blas64: negative vector increment"
// Dot computes the dot product of the two vectors:
// \sum_i x[i]*y[i].
func Dot(n int, x, y Vector) float64 {
return blas64.Ddot(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Nrm2 computes the Euclidean norm of the vector x:
// sqrt(\sum_i x[i]*x[i]).
//
// Nrm2 will panic if the vector increment is negative.
func Nrm2(n int, x Vector) float64 {
if x.Inc < 0 {
panic(negInc)
}
return blas64.Dnrm2(n, x.Data, x.Inc)
}
// Asum computes the sum of the absolute values of the elements of x:
// \sum_i |x[i]|.
//
// Asum will panic if the vector increment is negative.
func Asum(n int, x Vector) float64 {
if x.Inc < 0 {
panic(negInc)
}
return blas64.Dasum(n, x.Data, x.Inc)
}
// Iamax returns the index of an element of x with the largest absolute value.
// If there are multiple such indices the earliest is returned.
// Iamax returns -1 if n == 0.
//
// Iamax will panic if the vector increment is negative.
func Iamax(n int, x Vector) int {
if x.Inc < 0 {
panic(negInc)
}
return blas64.Idamax(n, x.Data, x.Inc)
}
// Swap exchanges the elements of the two vectors:
// x[i], y[i] = y[i], x[i] for all i.
func Swap(n int, x, y Vector) {
blas64.Dswap(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Copy copies the elements of x into the elements of y:
// y[i] = x[i] for all i.
func Copy(n int, x, y Vector) {
blas64.Dcopy(n, x.Data, x.Inc, y.Data, y.Inc)
}
// Axpy adds x scaled by alpha to y:
// y[i] += alpha*x[i] for all i.
func Axpy(n int, alpha float64, x, y Vector) {
blas64.Daxpy(n, alpha, x.Data, x.Inc, y.Data, y.Inc)
}
// Rotg computes the parameters of a Givens plane rotation so that
// ⎡ c s⎤ ⎡a⎤ ⎡r⎤
// ⎣-s c⎦ * ⎣b⎦ = ⎣0⎦
// where a and b are the Cartesian coordinates of a given point.
// c, s, and r are defined as
// r = ±Sqrt(a^2 + b^2),
// c = a/r, the cosine of the rotation angle,
// s = a/r, the sine of the rotation angle,
// and z is defined such that
// if |a| > |b|, z = s,
// otherwise if c != 0, z = 1/c,
// otherwise z = 1.
func Rotg(a, b float64) (c, s, r, z float64) {
return blas64.Drotg(a, b)
}
// Rotmg computes the modified Givens rotation. See
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details.
func Rotmg(d1, d2, b1, b2 float64) (p blas.DrotmParams, rd1, rd2, rb1 float64) {
return blas64.Drotmg(d1, d2, b1, b2)
}
// Rot applies a plane transformation to n points represented by the vectors x
// and y:
// x[i] = c*x[i] + s*y[i],
// y[i] = -s*x[i] + c*y[i], for all i.
func Rot(n int, x, y Vector, c, s float64) {
blas64.Drot(n, x.Data, x.Inc, y.Data, y.Inc, c, s)
}
// Rotm applies the modified Givens rotation to n points represented by the
// vectors x and y.
func Rotm(n int, x, y Vector, p blas.DrotmParams) {
blas64.Drotm(n, x.Data, x.Inc, y.Data, y.Inc, p)
}
// Scal scales the vector x by alpha:
// x[i] *= alpha for all i.
//
// Scal will panic if the vector increment is negative.
func Scal(n int, alpha float64, x Vector) {
if x.Inc < 0 {
panic(negInc)
}
blas64.Dscal(n, alpha, x.Data, x.Inc)
}
// Level 2
// Gemv computes
// y = alpha * A * x + beta * y, if t == blas.NoTrans,
// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans,
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
func Gemv(t blas.Transpose, alpha float64, a General, x Vector, beta float64, y Vector) {
blas64.Dgemv(t, a.Rows, a.Cols, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Gbmv computes
// y = alpha * A * x + beta * y, if t == blas.NoTrans,
// y = alpha * A^T * x + beta * y, if t == blas.Trans or blas.ConjTrans,
// where A is an m×n band matrix, x and y are vectors, and alpha and beta are scalars.
func Gbmv(t blas.Transpose, alpha float64, a Band, x Vector, beta float64, y Vector) {
blas64.Dgbmv(t, a.Rows, a.Cols, a.KL, a.KU, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Trmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix, and x is a vector.
func Trmv(t blas.Transpose, a Triangular, x Vector) {
blas64.Dtrmv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc)
}
// Tbmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular band matrix, and x is a vector.
func Tbmv(t blas.Transpose, a TriangularBand, x Vector) {
blas64.Dtbmv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc)
}
// Tpmv computes
// x = A * x, if t == blas.NoTrans,
// x = A^T * x, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix in packed format, and x is a vector.
func Tpmv(t blas.Transpose, a TriangularPacked, x Vector) {
blas64.Dtpmv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc)
}
// Trsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix, and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in-place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Trsv(t blas.Transpose, a Triangular, x Vector) {
blas64.Dtrsv(a.Uplo, t, a.Diag, a.N, a.Data, a.Stride, x.Data, x.Inc)
}
// Tbsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular band matrix, and x and b are vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Tbsv(t blas.Transpose, a TriangularBand, x Vector) {
blas64.Dtbsv(a.Uplo, t, a.Diag, a.N, a.K, a.Data, a.Stride, x.Data, x.Inc)
}
// Tpsv solves
// A * x = b, if t == blas.NoTrans,
// A^T * x = b, if t == blas.Trans or blas.ConjTrans,
// where A is an n×n triangular matrix in packed format, and x and b are
// vectors.
//
// At entry to the function, x contains the values of b, and the result is
// stored in place into x.
//
// No test for singularity or near-singularity is included in this
// routine. Such tests must be performed before calling this routine.
func Tpsv(t blas.Transpose, a TriangularPacked, x Vector) {
blas64.Dtpsv(a.Uplo, t, a.Diag, a.N, a.Data, x.Data, x.Inc)
}
// Symv computes
// y = alpha * A * x + beta * y,
// where A is an n×n symmetric matrix, x and y are vectors, and alpha and
// beta are scalars.
func Symv(alpha float64, a Symmetric, x Vector, beta float64, y Vector) {
blas64.Dsymv(a.Uplo, a.N, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Sbmv performs
// y = alpha * A * x + beta * y,
// where A is an n×n symmetric band matrix, x and y are vectors, and alpha
// and beta are scalars.
func Sbmv(alpha float64, a SymmetricBand, x Vector, beta float64, y Vector) {
blas64.Dsbmv(a.Uplo, a.N, a.K, alpha, a.Data, a.Stride, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Spmv performs
// y = alpha * A * x + beta * y,
// where A is an n×n symmetric matrix in packed format, x and y are vectors,
// and alpha and beta are scalars.
func Spmv(alpha float64, a SymmetricPacked, x Vector, beta float64, y Vector) {
blas64.Dspmv(a.Uplo, a.N, alpha, a.Data, x.Data, x.Inc, beta, y.Data, y.Inc)
}
// Ger performs a rank-1 update
// A += alpha * x * y^T,
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func Ger(alpha float64, x, y Vector, a General) {
blas64.Dger(a.Rows, a.Cols, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Syr performs a rank-1 update
// A += alpha * x * x^T,
// where A is an n×n symmetric matrix, x is a vector, and alpha is a scalar.
func Syr(alpha float64, x Vector, a Symmetric) {
blas64.Dsyr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data, a.Stride)
}
// Spr performs the rank-1 update
// A += alpha * x * x^T,
// where A is an n×n symmetric matrix in packed format, x is a vector, and
// alpha is a scalar.
func Spr(alpha float64, x Vector, a SymmetricPacked) {
blas64.Dspr(a.Uplo, a.N, alpha, x.Data, x.Inc, a.Data)
}
// Syr2 performs a rank-2 update
// A += alpha * x * y^T + alpha * y * x^T,
// where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.
func Syr2(alpha float64, x, y Vector, a Symmetric) {
blas64.Dsyr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data, a.Stride)
}
// Spr2 performs a rank-2 update
// A += alpha * x * y^T + alpha * y * x^T,
// where A is an n×n symmetric matrix in packed format, x and y are vectors,
// and alpha is a scalar.
func Spr2(alpha float64, x, y Vector, a SymmetricPacked) {
blas64.Dspr2(a.Uplo, a.N, alpha, x.Data, x.Inc, y.Data, y.Inc, a.Data)
}
// Level 3
// Gemm computes
// C = alpha * A * B + beta * C,
// where A, B, and C are dense matrices, and alpha and beta are scalars.
// tA and tB specify whether A or B are transposed.
func Gemm(tA, tB blas.Transpose, alpha float64, a, b General, beta float64, c General) {
var m, n, k int
if tA == blas.NoTrans {
m, k = a.Rows, a.Cols
} else {
m, k = a.Cols, a.Rows
}
if tB == blas.NoTrans {
n = b.Cols
} else {
n = b.Rows
}
blas64.Dgemm(tA, tB, m, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Symm performs
// C = alpha * A * B + beta * C, if s == blas.Left,
// C = alpha * B * A + beta * C, if s == blas.Right,
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and
// alpha is a scalar.
func Symm(s blas.Side, alpha float64, a Symmetric, b General, beta float64, c General) {
var m, n int
if s == blas.Left {
m, n = a.N, b.Cols
} else {
m, n = b.Rows, a.N
}
blas64.Dsymm(s, a.Uplo, m, n, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Syrk performs a symmetric rank-k update
// C = alpha * A * A^T + beta * C, if t == blas.NoTrans,
// C = alpha * A^T * A + beta * C, if t == blas.Trans or blas.ConjTrans,
// where C is an n×n symmetric matrix, A is an n×k matrix if t == blas.NoTrans and
// a k×n matrix otherwise, and alpha and beta are scalars.
func Syrk(t blas.Transpose, alpha float64, a General, beta float64, c Symmetric) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
blas64.Dsyrk(c.Uplo, t, n, k, alpha, a.Data, a.Stride, beta, c.Data, c.Stride)
}
// Syr2k performs a symmetric rank-2k update
// C = alpha * A * B^T + alpha * B * A^T + beta * C, if t == blas.NoTrans,
// C = alpha * A^T * B + alpha * B^T * A + beta * C, if t == blas.Trans or blas.ConjTrans,
// where C is an n×n symmetric matrix, A and B are n×k matrices if t == NoTrans
// and k×n matrices otherwise, and alpha and beta are scalars.
func Syr2k(t blas.Transpose, alpha float64, a, b General, beta float64, c Symmetric) {
var n, k int
if t == blas.NoTrans {
n, k = a.Rows, a.Cols
} else {
n, k = a.Cols, a.Rows
}
blas64.Dsyr2k(c.Uplo, t, n, k, alpha, a.Data, a.Stride, b.Data, b.Stride, beta, c.Data, c.Stride)
}
// Trmm performs
// B = alpha * A * B, if tA == blas.NoTrans and s == blas.Left,
// B = alpha * A^T * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left,
// B = alpha * B * A, if tA == blas.NoTrans and s == blas.Right,
// B = alpha * B * A^T, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right,
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is
// a scalar.
func Trmm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) {
blas64.Dtrmm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride)
}
// Trsm solves
// A * X = alpha * B, if tA == blas.NoTrans and s == blas.Left,
// A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Left,
// X * A = alpha * B, if tA == blas.NoTrans and s == blas.Right,
// X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and s == blas.Right,
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and
// alpha is a scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in-place into X.
//
// No check is made that A is invertible.
func Trsm(s blas.Side, tA blas.Transpose, alpha float64, a Triangular, b General) {
blas64.Dtrsm(s, a.Uplo, tA, a.Diag, b.Rows, b.Cols, alpha, a.Data, a.Stride, b.Data, b.Stride)
}
@@ -0,0 +1,277 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import "gonum.org/v1/gonum/blas"
// GeneralCols represents a matrix using the conventional column-major storage scheme.
type GeneralCols General
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t GeneralCols) From(a General) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if len(t.Data) < (t.Cols-1)*t.Stride+t.Rows {
panic("blas64: short data slice")
}
for i := 0; i < a.Rows; i++ {
for j, v := range a.Data[i*a.Stride : i*a.Stride+a.Cols] {
t.Data[i+j*t.Stride] = v
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions as a and have adequate backing
// data storage.
func (t General) From(a GeneralCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if len(t.Data) < (t.Rows-1)*t.Stride+t.Cols {
panic("blas64: short data slice")
}
for j := 0; j < a.Cols; j++ {
for i, v := range a.Data[j*a.Stride : j*a.Stride+a.Rows] {
t.Data[i*t.Stride+j] = v
}
}
}
// TriangularCols represents a matrix using the conventional column-major storage scheme.
type TriangularCols Triangular
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t TriangularCols) From(a Triangular) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, uplo and diag as a and have
// adequate backing data storage.
func (t Triangular) From(a TriangularCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.All:
for i := 0; i < a.N; i++ {
for j := 0; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// BandCols represents a matrix using the band column-major storage scheme.
type BandCols Band
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t BandCols) From(a Band) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("blas64: short stride for destination")
}
for i := 0; i < a.Rows; i++ {
for j := max(0, i-a.KL); j < min(i+a.KU+1, a.Cols); j++ {
t.Data[i+t.KU-j+j*t.Stride] = a.Data[j+a.KL-i+i*a.Stride]
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and bandwidth as a and have
// adequate backing data storage.
func (t Band) From(a BandCols) {
if t.Rows != a.Rows || t.Cols != a.Cols {
panic("blas64: mismatched dimension")
}
if t.KL != a.KL || t.KU != a.KU {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.KL+a.KU+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.KL+t.KU+1 {
panic("blas64: short stride for destination")
}
for j := 0; j < a.Cols; j++ {
for i := max(0, j-a.KU); i < min(j+a.KL+1, a.Rows); i++ {
t.Data[j+a.KL-i+i*a.Stride] = a.Data[i+t.KU-j+j*t.Stride]
}
}
}
// TriangularBandCols represents a symmetric matrix using the band column-major storage scheme.
type TriangularBandCols TriangularBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBandCols) From(a TriangularBand) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t TriangularBand) From(a TriangularBandCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
if t.Diag != a.Diag {
panic("blas64: mismatched BLAS diag")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
@@ -0,0 +1,153 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blas64
import "gonum.org/v1/gonum/blas"
// SymmetricCols represents a matrix using the conventional column-major storage scheme.
type SymmetricCols Symmetric
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t SymmetricCols) From(a Symmetric) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i+j*t.Stride] = a.Data[i*a.Stride+j]
}
}
}
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions and uplo as a and have adequate
// backing data storage.
func (t Symmetric) From(a SymmetricCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
for i := 0; i < a.N; i++ {
for j := i; j < a.N; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
case blas.Lower:
for i := 0; i < a.N; i++ {
for j := 0; j <= i; j++ {
t.Data[i*t.Stride+j] = a.Data[i+j*a.Stride]
}
}
}
}
// SymmetricBandCols represents a symmetric matrix using the band column-major storage scheme.
type SymmetricBandCols SymmetricBand
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBandCols) From(a SymmetricBand) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
dst := BandCols{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := Band{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
// From fills the receiver with elements from a. The receiver
// must have the same dimensions, bandwidth and uplo as a and
// have adequate backing data storage.
func (t SymmetricBand) From(a SymmetricBandCols) {
if t.N != a.N {
panic("blas64: mismatched dimension")
}
if t.K != a.K {
panic("blas64: mismatched bandwidth")
}
if a.Stride < a.K+1 {
panic("blas64: short stride for source")
}
if t.Stride < t.K+1 {
panic("blas64: short stride for destination")
}
if t.Uplo != a.Uplo {
panic("blas64: mismatched BLAS uplo")
}
dst := Band{
Rows: t.N, Cols: t.N,
Stride: t.Stride,
Data: t.Data,
}
src := BandCols{
Rows: a.N, Cols: a.N,
Stride: a.Stride,
Data: a.Data,
}
switch a.Uplo {
default:
panic("blas64: bad BLAS uplo")
case blas.Upper:
dst.KU = t.K
src.KU = a.K
case blas.Lower:
dst.KL = t.K
src.KL = a.K
}
dst.From(src)
}
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blas64 provides a simple interface to the float64 BLAS API.
package blas64
+159
View File
@@ -0,0 +1,159 @@
#!/usr/bin/env bash
# Copyright ©2017 The Gonum Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Generate code for blas32.
echo Generating blas32/conv.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv.go
cat blas64/conv.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
\
>> blas32/conv.go
echo Generating blas32/conv_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_test.go
cat blas64/conv_test.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
\
>> blas32/conv_test.go
echo Generating blas32/conv_symmetric.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_symmetric.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
\
>> blas32/conv_symmetric.go
echo Generating blas32/conv_symmetric_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > blas32/conv_symmetric_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> float32' \
\
| sed -e 's/blas64/blas32/' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
\
>> blas32/conv_symmetric_test.go
# Generate code for cblas128.
echo Generating cblas128/conv.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv.go
cat blas64/conv.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
\
>> cblas128/conv.go
echo Generating cblas128/conv_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_test.go
cat blas64/conv_test.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's_"math"_math "math/cmplx"_' \
\
>> cblas128/conv_test.go
echo Generating cblas128/conv_symmetric.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_symmetric.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
\
>> cblas128/conv_symmetric.go
echo Generating cblas128/conv_symmetric_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_symmetric_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's_"math"_math "math/cmplx"_' \
\
>> cblas128/conv_symmetric_test.go
echo Generating cblas128/conv_hermitian.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_hermitian.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
\
>> cblas128/conv_hermitian.go
echo Generating cblas128/conv_hermitian_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas128/conv_hermitian_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> complex128' \
\
| sed -e 's/blas64/cblas128/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
-e 's_"math"_math "math/cmplx"_' \
\
>> cblas128/conv_hermitian_test.go
# Generate code for cblas64.
echo Generating cblas64/conv.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv.go
cat blas64/conv.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
\
>> cblas64/conv.go
echo Generating cblas64/conv_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_test.go
cat blas64/conv_test.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/cmplx64"_' \
\
>> cblas64/conv_test.go
echo Generating cblas64/conv_hermitian.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_hermitian.go
cat blas64/conv_symmetric.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
\
>> cblas64/conv_hermitian.go
echo Generating cblas64/conv_hermitian_test.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas”; DO NOT EDIT.\n' > cblas64/conv_hermitian_test.go
cat blas64/conv_symmetric_test.go \
| gofmt -r 'float64 -> complex64' \
\
| sed -e 's/blas64/cblas64/' \
-e 's/Symmetric/Hermitian/g' \
-e 's/a symmetric/an Hermitian/g' \
-e 's/symmetric/hermitian/g' \
-e 's/Sym/Herm/g' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/cmplx64"_' \
\
>> cblas64/conv_hermitian_test.go
+108
View File
@@ -0,0 +1,108 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package blas provides interfaces for the BLAS linear algebra standard.
All methods must perform appropriate parameter checking and panic if
provided parameters that do not conform to the requirements specified
by the BLAS standard.
Quick Reference Guide to the BLAS from http://www.netlib.org/lapack/lug/node145.html
This version is modified to remove the "order" option. All matrix operations are
on row-order matrices.
Level 1 BLAS
dim scalar vector vector scalars 5-element prefixes
struct
_rotg ( a, b ) S, D
_rotmg( d1, d2, a, b ) S, D
_rot ( n, x, incX, y, incY, c, s ) S, D
_rotm ( n, x, incX, y, incY, param ) S, D
_swap ( n, x, incX, y, incY ) S, D, C, Z
_scal ( n, alpha, x, incX ) S, D, C, Z, Cs, Zd
_copy ( n, x, incX, y, incY ) S, D, C, Z
_axpy ( n, alpha, x, incX, y, incY ) S, D, C, Z
_dot ( n, x, incX, y, incY ) S, D, Ds
_dotu ( n, x, incX, y, incY ) C, Z
_dotc ( n, x, incX, y, incY ) C, Z
__dot ( n, alpha, x, incX, y, incY ) Sds
_nrm2 ( n, x, incX ) S, D, Sc, Dz
_asum ( n, x, incX ) S, D, Sc, Dz
I_amax( n, x, incX ) s, d, c, z
Level 2 BLAS
options dim b-width scalar matrix vector scalar vector prefixes
_gemv ( trans, m, n, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_gbmv ( trans, m, n, kL, kU, alpha, a, lda, x, incX, beta, y, incY ) S, D, C, Z
_hemv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) C, Z
_hpmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) C, Z
_symv ( uplo, n, alpha, a, lda, x, incX, beta, y, incY ) S, D
_sbmv ( uplo, n, k, alpha, a, lda, x, incX, beta, y, incY ) S, D
_spmv ( uplo, n, alpha, ap, x, incX, beta, y, incY ) S, D
_trmv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbmv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpmv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
_trsv ( uplo, trans, diag, n, a, lda, x, incX ) S, D, C, Z
_tbsv ( uplo, trans, diag, n, k, a, lda, x, incX ) S, D, C, Z
_tpsv ( uplo, trans, diag, n, ap, x, incX ) S, D, C, Z
options dim scalar vector vector matrix prefixes
_ger ( m, n, alpha, x, incX, y, incY, a, lda ) S, D
_geru ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_gerc ( m, n, alpha, x, incX, y, incY, a, lda ) C, Z
_her ( uplo, n, alpha, x, incX, a, lda ) C, Z
_hpr ( uplo, n, alpha, x, incX, ap ) C, Z
_her2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) C, Z
_hpr2 ( uplo, n, alpha, x, incX, y, incY, ap ) C, Z
_syr ( uplo, n, alpha, x, incX, a, lda ) S, D
_spr ( uplo, n, alpha, x, incX, ap ) S, D
_syr2 ( uplo, n, alpha, x, incX, y, incY, a, lda ) S, D
_spr2 ( uplo, n, alpha, x, incX, y, incY, ap ) S, D
Level 3 BLAS
options dim scalar matrix matrix scalar matrix prefixes
_gemm ( transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_symm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_hemm ( side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_syrk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) S, D, C, Z
_herk ( uplo, trans, n, k, alpha, a, lda, beta, c, ldc ) C, Z
_syr2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) S, D, C, Z
_her2k( uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc ) C, Z
_trmm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
_trsm ( side, uplo, transA, diag, m, n, alpha, a, lda, b, ldb ) S, D, C, Z
Meaning of prefixes
S - float32 C - complex64
D - float64 Z - complex128
Matrix types
GE - GEneral GB - General Band
SY - SYmmetric SB - Symmetric Band SP - Symmetric Packed
HE - HErmitian HB - Hermitian Band HP - Hermitian Packed
TR - TRiangular TB - Triangular Band TP - Triangular Packed
Options
trans = NoTrans, Trans, ConjTrans
uplo = Upper, Lower
diag = Nonunit, Unit
side = Left, Right (A or op(A) on the left, or A or op(A) on the right)
For real matrices, Trans and ConjTrans have the same meaning.
For Hermitian matrices, trans = Trans is not allowed.
For complex symmetric matrices, trans = ConjTrans is not allowed.
*/
package blas
@@ -0,0 +1,164 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import "gonum.org/v1/gonum/blas"
var (
_ blas.Complex64 = Implementation{}
_ blas.Complex128 = Implementation{}
)
// TODO(btracey): Replace this as complex routines are added, and instead
// automatically generate the complex64 routines from the complex128 ones.
var noComplex = "native: implementation does not implement this routine, see the cgo wrapper in gonum.org/v1/netlib/blas"
// Level 1 complex64 routines.
func (Implementation) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64) {
panic(noComplex)
}
func (Implementation) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64) {
panic(noComplex)
}
func (Implementation) Scnrm2(n int, x []complex64, incX int) float32 {
panic(noComplex)
}
func (Implementation) Scasum(n int, x []complex64, incX int) float32 {
panic(noComplex)
}
func (Implementation) Icamax(n int, x []complex64, incX int) int {
panic(noComplex)
}
func (Implementation) Cswap(n int, x []complex64, incX int, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Ccopy(n int, x []complex64, incX int, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Cscal(n int, alpha complex64, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Csscal(n int, alpha float32, x []complex64, incX int) {
panic(noComplex)
}
// Level 2 complex64 routines.
func (Implementation) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Cgbmv(tA blas.Transpose, m, n, kL, kU int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []complex64, x []complex64, incX int) {
panic(noComplex)
}
func (Implementation) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Chpmv(ul blas.Uplo, n int, alpha complex64, ap []complex64, x []complex64, incX int, beta complex64, y []complex64, incY int) {
panic(noComplex)
}
func (Implementation) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64) {
panic(noComplex)
}
func (Implementation) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int) {
panic(noComplex)
}
func (Implementation) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ap []complex64) {
panic(noComplex)
}
// Level 3 complex64 routines.
func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
panic(noComplex)
}
func (Implementation) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
panic(noComplex)
}
func (Implementation) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
panic(noComplex)
}
func (Implementation) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
panic(noComplex)
}
// Level 3 complex128 routines.
func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
panic(noComplex)
}
func (Implementation) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
panic(noComplex)
}
func (Implementation) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) {
panic(noComplex)
}
func (Implementation) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) {
panic(noComplex)
}
@@ -0,0 +1,265 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"runtime"
"sync"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
// Dgemm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C
// C = alpha * A^T * B + beta * C
// C = alpha * A * B^T + beta * C
// C = alpha * A^T * B^T + beta * C
// where A is an m×k or k×m dense matrix, B is an n×k or k×n dense matrix, C is
// an m×n matrix, and alpha and beta are scalars. tA and tB specify whether A or
// B are transposed.
func (Implementation) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans {
panic(badTranspose)
}
aTrans := tA == blas.Trans || tA == blas.ConjTrans
if aTrans {
checkDMatrix('a', k, m, a, lda)
} else {
checkDMatrix('a', m, k, a, lda)
}
bTrans := tB == blas.Trans || tB == blas.ConjTrans
if bTrans {
checkDMatrix('b', n, k, b, ldb)
} else {
checkDMatrix('b', k, n, b, ldb)
}
checkDMatrix('c', m, n, c, ldc)
// scale c
if beta != 1 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
} else {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
}
}
dgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
}
func dgemmParallel(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// dgemmParallel computes a parallel matrix multiplication by partitioning
// a and b into sub-blocks, and updating c with the multiplication of the sub-block
// In all cases,
// A = [ A_11 A_12 ... A_1j
// A_21 A_22 ... A_2j
// ...
// A_i1 A_i2 ... A_ij]
//
// and same for B. All of the submatrix sizes are blockSize×blockSize except
// at the edges.
//
// In all cases, there is one dimension for each matrix along which
// C must be updated sequentially.
// Cij = \sum_k Aik Bki, (A * B)
// Cij = \sum_k Aki Bkj, (A^T * B)
// Cij = \sum_k Aik Bjk, (A * B^T)
// Cij = \sum_k Aki Bjk, (A^T * B^T)
//
// This code computes one {i, j} block sequentially along the k dimension,
// and computes all of the {i, j} blocks concurrently. This
// partitioning allows Cij to be updated in-place without race-conditions.
// Instead of launching a goroutine for each possible concurrent computation,
// a number of worker goroutines are created and channels are used to pass
// available and completed cases.
//
// http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix
// multiplies, though this code does not copy matrices to attempt to eliminate
// cache misses.
maxKLen := k
parBlocks := blocks(m, blockSize) * blocks(n, blockSize)
if parBlocks < minParBlock {
// The matrix multiplication is small in the dimensions where it can be
// computed concurrently. Just do it in serial.
dgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
}
nWorkers := runtime.GOMAXPROCS(0)
if parBlocks < nWorkers {
nWorkers = parBlocks
}
// There is a tradeoff between the workers having to wait for work
// and a large buffer making operations slow.
buf := buffMul * nWorkers
if buf > parBlocks {
buf = parBlocks
}
sendChan := make(chan subMul, buf)
// Launch workers. A worker receives an {i, j} submatrix of c, and computes
// A_ik B_ki (or the transposed version) storing the result in c_ij. When the
// channel is finally closed, it signals to the waitgroup that it has finished
// computing.
var wg sync.WaitGroup
for i := 0; i < nWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
// Make local copies of otherwise global variables to reduce shared memory.
// This has a noticeable effect on benchmarks in some cases.
alpha := alpha
aTrans := aTrans
bTrans := bTrans
m := m
n := n
for sub := range sendChan {
i := sub.i
j := sub.j
leni := blockSize
if i+leni > m {
leni = m - i
}
lenj := blockSize
if j+lenj > n {
lenj = n - j
}
cSub := sliceView64(c, ldc, i, j, leni, lenj)
// Compute A_ik B_kj for all k
for k := 0; k < maxKLen; k += blockSize {
lenk := blockSize
if k+lenk > maxKLen {
lenk = maxKLen - k
}
var aSub, bSub []float64
if aTrans {
aSub = sliceView64(a, lda, k, i, lenk, leni)
} else {
aSub = sliceView64(a, lda, i, k, leni, lenk)
}
if bTrans {
bSub = sliceView64(b, ldb, j, k, lenj, lenk)
} else {
bSub = sliceView64(b, ldb, k, j, lenk, lenj)
}
dgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha)
}
}
}()
}
// Send out all of the {i, j} subblocks for computation.
for i := 0; i < m; i += blockSize {
for j := 0; j < n; j += blockSize {
sendChan <- subMul{
i: i,
j: j,
}
}
}
close(sendChan)
wg.Wait()
}
// dgemmSerial is serial matrix multiply
func dgemmSerial(aTrans, bTrans bool, m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
switch {
case !aTrans && !bTrans:
dgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && !bTrans:
dgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case !aTrans && bTrans:
dgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && bTrans:
dgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
default:
panic("unreachable")
}
}
// dgemmSerial where neither a nor b are transposed
func dgemmSerialNotNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for l, v := range a[i*lda : i*lda+k] {
tmp := alpha * v
if tmp != 0 {
f64.AxpyUnitaryTo(ctmp, tmp, b[l*ldb:l*ldb+n], ctmp)
}
}
}
}
// dgemmSerial where neither a is transposed and b is not
func dgemmSerialTransNot(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
btmp := b[l*ldb : l*ldb+n]
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f64.AxpyUnitaryTo(ctmp, tmp, btmp, ctmp)
}
}
}
}
// dgemmSerial where neither a is not transposed and b is
func dgemmSerialNotTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
atmp := a[i*lda : i*lda+k]
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] += alpha * f64.DotUnitary(atmp, b[j*ldb:j*ldb+k])
}
}
}
// dgemmSerial where both are transposed
func dgemmSerialTransTrans(m, n, k int, a []float64, lda int, b []float64, ldb int, c []float64, ldc int, alpha float64) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f64.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0)
}
}
}
}
func sliceView64(a []float64, lda, i, j, r, c int) []float64 {
return a[i*lda+j : (i+r-1)*lda+j+c]
}
@@ -0,0 +1,88 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Ensure changes made to blas/native are reflected in blas/cgo where relevant.
/*
Package gonum is a Go implementation of the BLAS API. This implementation
panics when the input arguments are invalid as per the standard, for example
if a vector increment is zero. Note that the treatment of NaN values
is not specified, and differs among the BLAS implementations.
gonum.org/v1/gonum/blas/blas64 provides helpful wrapper functions to the BLAS
interface. The rest of this text describes the layout of the data for the input types.
Note that in the function documentation, x[i] refers to the i^th element
of the vector, which will be different from the i^th element of the slice if
incX != 1.
See http://www.netlib.org/lapack/explore-html/d4/de1/_l_i_c_e_n_s_e_source.html
for more license information.
Vector arguments are effectively strided slices. They have two input arguments,
a number of elements, n, and an increment, incX. The increment specifies the
distance between elements of the vector. The actual Go slice may be longer
than necessary.
The increment may be positive or negative, except in functions with only
a single vector argument where the increment may only be positive. If the increment
is negative, s[0] is the last element in the slice. Note that this is not the same
as counting backward from the end of the slice, as len(s) may be longer than
necessary. So, for example, if n = 5 and incX = 3, the elements of s are
[0 * * 1 * * 2 * * 3 * * 4 * * * ...]
where elements are never accessed. If incX = -3, the same elements are
accessed, just in reverse order (4, 3, 2, 1, 0).
Dense matrices are specified by a number of rows, a number of columns, and a stride.
The stride specifies the number of entries in the slice between the first element
of successive rows. The stride must be at least as large as the number of columns
but may be longer.
[a00 ... a0n a0* ... a1stride-1 a21 ... amn am* ... amstride-1]
Thus, dense[i*ld + j] refers to the {i, j}th element of the matrix.
Symmetric and triangular matrices (non-packed) are stored identically to Dense,
except that only elements in one triangle of the matrix are accessed.
Packed symmetric and packed triangular matrices are laid out with the entries
condensed such that all of the unreferenced elements are removed. So, the upper triangular
matrix
[
1 2 3
0 4 5
0 0 6
]
and the lower-triangular matrix
[
1 0 0
2 3 0
4 5 6
]
will both be compacted as [1 2 3 4 5 6]. The (i, j) element of the original
dense matrix can be found at element i*n - (i-1)*i/2 + j for upper triangular,
and at element i * (i+1) /2 + j for lower triangular.
Banded matrices are laid out in a compact format, constructed by removing the
zeros in the rows and aligning the diagonals. For example, the matrix
[
1 2 3 0 0 0
4 5 6 7 0 0
0 8 9 10 11 0
0 0 12 13 14 15
0 0 0 16 17 18
0 0 0 0 19 20
]
implicitly becomes ( entries are never accessed)
[
* 1 2 3
4 5 6 7
8 9 10 11
12 13 14 15
16 17 18 *
19 20 * *
]
which is given to the BLAS routine as [ 1 2 3 4 ...].
See http://www.crest.iu.edu/research/mtl/reference/html/banded.html
for more information
*/
package gonum
@@ -0,0 +1,180 @@
// Copyright ©2018 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
"gonum.org/v1/gonum/internal/asm/f64"
)
// TODO(Kunde21): Merge these methods back into level2double/level2single when Sgemv assembly kernels are merged into f32.
// Dgemv computes
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Set up indexes
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(badY)
}
if lda*(m-1)+n > len(a) || lda < max(1, n) {
panic(badLdA)
}
// Quick return if possible
if m == 0 || n == 0 || (alpha == 0 && beta == 1) {
return
}
if alpha == 0 {
// First form y = beta * y
if incY > 0 {
Implementation{}.Dscal(lenY, beta, y, incY)
} else {
Implementation{}.Dscal(lenY, beta, y, -incY)
}
return
}
// Form y = alpha * A * x + y
if tA == blas.NoTrans {
f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
return
}
// Cases where a is transposed.
f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY))
}
// Sgemv computes
// y = alpha * A * x + beta * y if tA = blas.NoTrans
// y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
// where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic(badLdA)
}
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
// Set up indexes
lenX := m
lenY := n
if tA == blas.NoTrans {
lenX = n
lenY = m
}
if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) {
panic(badY)
}
if lda*(m-1)+n > len(a) || lda < max(1, n) {
panic(badLdA)
}
// Quick return if possible
if m == 0 || n == 0 || (alpha == 0 && beta == 1) {
return
}
var kx, ky int
if incX < 0 {
kx = -(lenX - 1) * incX
}
if incY < 0 {
ky = -(lenY - 1) * incY
}
// First form y = beta * y
if incY > 0 {
Implementation{}.Sscal(lenY, beta, y, incY)
} else {
Implementation{}.Sscal(lenY, beta, y, -incY)
}
if alpha == 0 {
return
}
// Form y = alpha * A * x + y
if tA == blas.NoTrans {
if incX == 1 && incY == 1 {
for i := 0; i < m; i++ {
y[i] += alpha * f32.DotUnitary(a[lda*i:lda*i+n], x)
}
return
}
iy := ky
for i := 0; i < m; i++ {
y[iy] += alpha * f32.DotInc(x, a[lda*i:lda*i+n], uintptr(n), uintptr(incX), 1, uintptr(kx), 0)
iy += incY
}
return
}
// Cases where a is transposed.
if incX == 1 && incY == 1 {
for i := 0; i < m; i++ {
tmp := alpha * x[i]
if tmp != 0 {
f32.AxpyUnitaryTo(y, tmp, a[lda*i:lda*i+n], y)
}
}
return
}
ix := kx
for i := 0; i < m; i++ {
tmp := alpha * x[ix]
if tmp != 0 {
f32.AxpyInc(tmp, a[lda*i:lda*i+n], y, uintptr(n), 1, uintptr(incY), 0, uintptr(ky))
}
ix += incX
}
}
@@ -0,0 +1,182 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate ./single_precision.bash
package gonum
import "math"
type Implementation struct{}
// The following are panic strings used during parameter checks.
const (
zeroIncX = "blas: zero x index increment"
zeroIncY = "blas: zero y index increment"
mLT0 = "blas: m < 0"
nLT0 = "blas: n < 0"
kLT0 = "blas: k < 0"
kLLT0 = "blas: kL < 0"
kULT0 = "blas: kU < 0"
badUplo = "blas: illegal triangle"
badTranspose = "blas: illegal transpose"
badDiag = "blas: illegal diagonal"
badSide = "blas: illegal side"
badLdA = "blas: bad leading dimension of A"
badLdB = "blas: bad leading dimension of B"
badLdC = "blas: bad leading dimension of C"
badX = "blas: bad length of x"
badY = "blas: bad length of y"
)
// [SD]gemm behavior constants. These are kept here to keep them out of the
// way during single precision code genration.
const (
blockSize = 64 // b x b matrix
minParBlock = 4 // minimum number of blocks needed to go parallel
buffMul = 4 // how big is the buffer relative to the number of workers
)
// subMul is a common type shared by [SD]gemm.
type subMul struct {
i, j int // index of block
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func min(a, b int) int {
if a > b {
return b
}
return a
}
func checkSMatrix(name byte, m, n int, a []float32, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < n {
panic("blas: illegal stride of " + string(name))
}
if len(a) < (m-1)*lda+n {
panic("blas: index of " + string(name) + " out of range")
}
}
func checkDMatrix(name byte, m, n int, a []float64, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < n {
panic("blas: illegal stride of " + string(name))
}
if len(a) < (m-1)*lda+n {
panic("blas: index of " + string(name) + " out of range")
}
}
func checkZMatrix(name byte, m, n int, a []complex128, lda int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if lda < max(1, n) {
panic("blas: illegal stride of " + string(name))
}
if len(a) < (m-1)*lda+n {
panic("blas: insufficient " + string(name) + " matrix slice length")
}
}
func checkZBandMatrix(name byte, m, n, kL, kU int, ab []complex128, ldab int) {
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if kL < 0 {
panic(kLLT0)
}
if kU < 0 {
panic(kULT0)
}
if ldab < kL+kU+1 {
panic("blas: illegal stride of band matrix " + string(name))
}
nRow := min(m, n+kL)
if len(ab) < (nRow-1)*ldab+kL+1+kU {
panic("blas: insufficient " + string(name) + " band matrix slice length")
}
}
func checkZhbMatrix(name byte, n, k int, ab []complex128, ldab int) {
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if ldab < k+1 {
panic("blas: illegal stride of Hermitian band matrix " + string(name))
}
if len(ab) < (n-1)*ldab+k+1 {
panic("blas: insufficient " + string(name) + " Hermitian band matrix slice length")
}
}
func checkZtbMatrix(name byte, n, k int, ab []complex128, ldab int) {
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if ldab < k+1 {
panic("blas: illegal stride of triangular band matrix " + string(name))
}
if len(ab) < (n-1)*ldab+k+1 {
panic("blas: insufficient " + string(name) + " triangular band matrix slice length")
}
}
func checkZVector(name byte, n int, x []complex128, incX int) {
if n < 0 {
panic(nLT0)
}
if incX == 0 {
panic(zeroIncX)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic("blas: insufficient " + string(name) + " vector slice length")
}
}
// blocks returns the number of divisions of the dimension length with the given
// block size.
func blocks(dim, bsize int) int {
return (dim + bsize - 1) / bsize
}
// dcabs1 returns |real(z)|+|imag(z)|.
func dcabs1(z complex128) float64 {
return math.Abs(real(z)) + math.Abs(imag(z))
}
@@ -0,0 +1,442 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/internal/asm/c128"
)
// Dzasum returns the sum of the absolute values of the elements of x
// \sum_i |Re(x[i])| + |Im(x[i])|
// Dzasum returns 0 if incX is negative.
func (Implementation) Dzasum(n int, x []complex128, incX int) float64 {
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
var sum float64
if incX == 1 {
if len(x) < n {
panic(badX)
}
for _, v := range x[:n] {
sum += dcabs1(v)
}
return sum
}
if (n-1)*incX >= len(x) {
panic(badX)
}
for i := 0; i < n; i++ {
v := x[i*incX]
sum += dcabs1(v)
}
return sum
}
// Dznrm2 computes the Euclidean norm of the complex vector x,
// ‖x‖_2 = sqrt(\sum_i x[i] * conj(x[i])).
// This function returns 0 if incX is negative.
func (Implementation) Dznrm2(n int, x []complex128, incX int) float64 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if n < 1 {
if n == 0 {
return 0
}
panic(nLT0)
}
if (n-1)*incX >= len(x) {
panic(badX)
}
var (
scale float64
ssq float64 = 1
)
if incX == 1 {
for _, v := range x[:n] {
re, im := math.Abs(real(v)), math.Abs(imag(v))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
for ix := 0; ix < n*incX; ix += incX {
re, im := math.Abs(real(x[ix])), math.Abs(imag(x[ix]))
if re != 0 {
if re > scale {
ssq = 1 + ssq*(scale/re)*(scale/re)
scale = re
} else {
ssq += (re / scale) * (re / scale)
}
}
if im != 0 {
if im > scale {
ssq = 1 + ssq*(scale/im)*(scale/im)
scale = im
} else {
ssq += (im / scale) * (im / scale)
}
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(ssq)
}
// Izamax returns the index of the first element of x having largest |Re(·)|+|Im(·)|.
// Izamax returns -1 if n is 0 or incX is negative.
func (Implementation) Izamax(n int, x []complex128, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
// Return invalid index.
return -1
}
if n < 1 {
if n == 0 {
// Return invalid index.
return -1
}
panic(nLT0)
}
if len(x) <= (n-1)*incX {
panic(badX)
}
idx := 0
max := dcabs1(x[0])
if incX == 1 {
for i, v := range x[1:n] {
absV := dcabs1(v)
if absV > max {
max = absV
idx = i + 1
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
absV := dcabs1(x[ix])
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Zaxpy adds alpha times x to y:
// y[i] += alpha * x[i] for all i
func (Implementation) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
c128.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (1 - n) * incX
}
if incY < 0 {
iy = (1 - n) * incY
}
c128.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zcopy copies the vector x to vector y.
func (Implementation) Zcopy(n int, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Zdotc computes the dot product
// x^H · y
// of two complex vectors x and y.
func (Implementation) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return c128.DotcUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(badY)
}
return c128.DotcInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zdotu computes the dot product
// x^T · y
// of two complex vectors x and y.
func (Implementation) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) complex128 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return c128.DotuUnitary(x[:n], y[:n])
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || (n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || (n-1)*incY >= len(y) {
panic(badY)
}
return c128.DotuInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Zdscal scales the vector x by a real scalar alpha.
// Zdscal has no effect if incX < 0.
func (Implementation) Zdscal(n int, alpha float64, x []complex128, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(badX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
x = x[:n]
for i, v := range x {
x[i] = complex(alpha*real(v), alpha*imag(v))
}
return
}
for ix := 0; ix < n*incX; ix += incX {
v := x[ix]
x[ix] = complex(alpha*real(v), alpha*imag(v))
}
}
// Zscal scales the vector x by a complex scalar alpha.
// Zscal has no effect if incX < 0.
func (Implementation) Zscal(n int, alpha complex128, x []complex128, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(badX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
c128.ScalUnitary(alpha, x[:n])
return
}
c128.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}
// Zswap exchanges the elements of two complex vectors x and y.
func (Implementation) Zswap(n int, x []complex128, incX int, y []complex128, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}
@@ -0,0 +1,620 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level1 = Implementation{}
// Dnrm2 computes the Euclidean norm of a vector,
// sqrt(\sum_i x[i] * x[i]).
// This function returns 0 if incX is negative.
func (Implementation) Dnrm2(n int, x []float64, incX int) float64 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if incX > 0 && (n-1)*incX >= len(x) {
panic(badX)
}
if n < 2 {
if n == 1 {
return math.Abs(x[0])
}
if n == 0 {
return 0
}
panic(nLT0)
}
var (
scale float64 = 0
sumSquares float64 = 1
)
if incX == 1 {
x = x[:n]
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
for ix := 0; ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
// Dasum computes the sum of the absolute values of the elements of x.
// \sum_i |x[i]|
// Dasum returns 0 if incX is negative.
func (Implementation) Dasum(n int, x []float64, incX int) float64 {
var sum float64
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if incX > 0 && (n-1)*incX >= len(x) {
panic(badX)
}
if incX == 1 {
x = x[:n]
for _, v := range x {
sum += math.Abs(v)
}
return sum
}
for i := 0; i < n; i++ {
sum += math.Abs(x[i*incX])
}
return sum
}
// Idamax returns the index of an element of x with the largest absolute value.
// If there are multiple such indices the earliest is returned.
// Idamax returns -1 if n == 0.
func (Implementation) Idamax(n int, x []float64, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return -1
}
if incX > 0 && (n-1)*incX >= len(x) {
panic(badX)
}
if n < 2 {
if n == 1 {
return 0
}
if n == 0 {
return -1 // Netlib returns invalid index when n == 0
}
panic(nLT0)
}
idx := 0
max := math.Abs(x[0])
if incX == 1 {
for i, v := range x[:n] {
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
v := x[ix]
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Dswap exchanges the elements of two vectors.
// x[i], y[i] = y[i], x[i] for all i
func (Implementation) Dswap(n int, x []float64, incX int, y []float64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}
// Dcopy copies the elements of x into the elements of y.
// y[i] = x[i] for all i
func (Implementation) Dcopy(n int, x []float64, incX int, y []float64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Daxpy adds alpha times x to y
// y[i] += alpha * x[i] for all i
func (Implementation) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
f64.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
f64.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Drotg computes the plane rotation
// _ _ _ _ _ _
// | c s | | a | | r |
// | -s c | * | b | = | 0 |
// ‾ ‾ ‾ ‾ ‾ ‾
// where
// r = ±√(a^2 + b^2)
// c = a/r, the cosine of the plane rotation
// s = b/r, the sine of the plane rotation
//
// NOTE: There is a discrepancy between the refence implementation and the BLAS
// technical manual regarding the sign for r when a or b are zero.
// Drotg agrees with the definition in the manual and other
// common BLAS implementations.
func (Implementation) Drotg(a, b float64) (c, s, r, z float64) {
if b == 0 && a == 0 {
return 1, 0, a, 0
}
absA := math.Abs(a)
absB := math.Abs(b)
aGTb := absA > absB
r = math.Hypot(a, b)
if aGTb {
r = math.Copysign(r, a)
} else {
r = math.Copysign(r, b)
}
c = a / r
s = b / r
if aGTb {
z = s
} else if c != 0 { // r == 0 case handled above
z = 1 / c
} else {
z = 1
}
return
}
// Drotmg computes the modified Givens rotation. See
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details.
func (Implementation) Drotmg(d1, d2, x1, y1 float64) (p blas.DrotmParams, rd1, rd2, rx1 float64) {
// The implementation of Drotmg used here is taken from Hopkins 1997
// Appendix A: https://doi.org/10.1145/289251.289253
// with the exception of the gam constants below.
const (
gam = 4096.0
gamsq = gam * gam
rgamsq = 1.0 / gamsq
)
if d1 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
if d2 == 0 || y1 == 0 {
p.Flag = blas.Identity
return p, d1, d2, x1
}
var h11, h12, h21, h22 float64
if (d1 == 0 || x1 == 0) && d2 > 0 {
p.Flag = blas.Diagonal
h12 = 1
h21 = -1
x1 = y1
d1, d2 = d2, d1
} else {
p2 := d2 * y1
p1 := d1 * x1
q2 := p2 * y1
q1 := p1 * x1
if math.Abs(q1) > math.Abs(q2) {
p.Flag = blas.OffDiagonal
h11 = 1
h22 = 1
h21 = -y1 / x1
h12 = p2 / p1
u := 1 - h12*h21
if u <= 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
d1 /= u
d2 /= u
x1 *= u
} else {
if q2 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
p.Flag = blas.Diagonal
h21 = -1
h12 = 1
h11 = p1 / p2
h22 = x1 / y1
u := 1 + h11*h22
d1, d2 = d2/u, d1/u
x1 = y1 * u
}
}
for d1 <= rgamsq && d1 != 0 {
p.Flag = blas.Rescaling
d1 = (d1 * gam) * gam
x1 /= gam
h11 /= gam
h12 /= gam
}
for d1 > gamsq {
p.Flag = blas.Rescaling
d1 = (d1 / gam) / gam
x1 *= gam
h11 *= gam
h12 *= gam
}
for math.Abs(d2) <= rgamsq && d2 != 0 {
p.Flag = blas.Rescaling
d2 = (d2 * gam) * gam
h21 /= gam
h22 /= gam
}
for math.Abs(d2) > gamsq {
p.Flag = blas.Rescaling
d2 = (d2 / gam) / gam
h21 *= gam
h22 *= gam
}
switch p.Flag {
case blas.Diagonal:
p.H = [4]float64{0: h11, 3: h22}
case blas.OffDiagonal:
p.H = [4]float64{1: h21, 2: h12}
case blas.Rescaling:
p.H = [4]float64{h11, h21, h12, h22}
default:
panic("blas: unexpected blas.Flag")
}
return p, d1, d2, x1
}
// Drot applies a plane transformation.
// x[i] = c * x[i] + s * y[i]
// y[i] = c * y[i] - s * x[i]
func (Implementation) Drot(n int, x []float64, incX int, y []float64, incY int, c float64, s float64) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = c*vx+s*vy, c*vy-s*vx
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx
ix += incX
iy += incY
}
}
// Drotm applies the modified Givens rotation to the 2×n matrix.
func (Implementation) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if p.Flag == blas.Identity {
return
}
switch p.Flag {
case blas.Rescaling:
h11 := p.H[0]
h12 := p.H[2]
h21 := p.H[1]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy*h12, vx*h21+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy*h12, vx*h21+vy*h22
ix += incX
iy += incY
}
case blas.OffDiagonal:
h12 := p.H[2]
h21 := p.H[1]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx+vy*h12, vx*h21+vy
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx+vy*h12, vx*h21+vy
ix += incX
iy += incY
}
case blas.Diagonal:
h11 := p.H[0]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy, -vx+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy, -vx+vy*h22
ix += incX
iy += incY
}
}
}
// Dscal scales x by alpha.
// x[i] *= alpha
// Dscal has no effect if incX < 0.
func (Implementation) Dscal(n int, alpha float64, x []float64, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(badX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
f64.ScalUnitary(alpha, x[:n])
return
}
f64.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}
@@ -0,0 +1,49 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f64"
)
// Ddot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
func (Implementation) Ddot(n int, x []float64, incX int, y []float64, incY int) float64 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return f64.DotUnitary(x[:n], y)
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(badY)
}
return f64.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
@@ -0,0 +1,644 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
math "gonum.org/v1/gonum/internal/math32"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level1 = Implementation{}
// Snrm2 computes the Euclidean norm of a vector,
// sqrt(\sum_i x[i] * x[i]).
// This function returns 0 if incX is negative.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Snrm2(n int, x []float32, incX int) float32 {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if incX > 0 && (n-1)*incX >= len(x) {
panic(badX)
}
if n < 2 {
if n == 1 {
return math.Abs(x[0])
}
if n == 0 {
return 0
}
panic(nLT0)
}
var (
scale float32 = 0
sumSquares float32 = 1
)
if incX == 1 {
x = x[:n]
for _, v := range x {
if v == 0 {
continue
}
absxi := math.Abs(v)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
for ix := 0; ix < n*incX; ix += incX {
val := x[ix]
if val == 0 {
continue
}
absxi := math.Abs(val)
if math.IsNaN(absxi) {
return math.NaN()
}
if scale < absxi {
sumSquares = 1 + sumSquares*(scale/absxi)*(scale/absxi)
scale = absxi
} else {
sumSquares = sumSquares + (absxi/scale)*(absxi/scale)
}
}
if math.IsInf(scale, 1) {
return math.Inf(1)
}
return scale * math.Sqrt(sumSquares)
}
// Sasum computes the sum of the absolute values of the elements of x.
// \sum_i |x[i]|
// Sasum returns 0 if incX is negative.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sasum(n int, x []float32, incX int) float32 {
var sum float32
if n < 0 {
panic(nLT0)
}
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return 0
}
if incX > 0 && (n-1)*incX >= len(x) {
panic(badX)
}
if incX == 1 {
x = x[:n]
for _, v := range x {
sum += math.Abs(v)
}
return sum
}
for i := 0; i < n; i++ {
sum += math.Abs(x[i*incX])
}
return sum
}
// Isamax returns the index of an element of x with the largest absolute value.
// If there are multiple such indices the earliest is returned.
// Isamax returns -1 if n == 0.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Isamax(n int, x []float32, incX int) int {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return -1
}
if incX > 0 && (n-1)*incX >= len(x) {
panic(badX)
}
if n < 2 {
if n == 1 {
return 0
}
if n == 0 {
return -1 // Netlib returns invalid index when n == 0
}
panic(nLT0)
}
idx := 0
max := math.Abs(x[0])
if incX == 1 {
for i, v := range x[:n] {
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
}
return idx
}
ix := incX
for i := 1; i < n; i++ {
v := x[ix]
absV := math.Abs(v)
if absV > max {
max = absV
idx = i
}
ix += incX
}
return idx
}
// Sswap exchanges the elements of two vectors.
// x[i], y[i] = y[i], x[i] for all i
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sswap(n int, x []float32, incX int, y []float32, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, v := range x {
x[i], y[i] = y[i], v
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
x[ix], y[iy] = y[iy], x[ix]
ix += incX
iy += incY
}
}
// Scopy copies the elements of x into the elements of y.
// y[i] = x[i] for all i
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Scopy(n int, x []float32, incX int, y []float32, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
copy(y[:n], x[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
y[iy] = x[ix]
ix += incX
iy += incY
}
}
// Saxpy adds alpha times x to y
// y[i] += alpha * x[i] for all i
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if alpha == 0 {
return
}
if incX == 1 && incY == 1 {
f32.AxpyUnitary(alpha, x[:n], y[:n])
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
// Srotg computes the plane rotation
// _ _ _ _ _ _
// | c s | | a | | r |
// | -s c | * | b | = | 0 |
// ‾ ‾ ‾ ‾ ‾ ‾
// where
// r = ±√(a^2 + b^2)
// c = a/r, the cosine of the plane rotation
// s = b/r, the sine of the plane rotation
//
// NOTE: There is a discrepancy between the refence implementation and the BLAS
// technical manual regarding the sign for r when a or b are zero.
// Srotg agrees with the definition in the manual and other
// common BLAS implementations.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotg(a, b float32) (c, s, r, z float32) {
if b == 0 && a == 0 {
return 1, 0, a, 0
}
absA := math.Abs(a)
absB := math.Abs(b)
aGTb := absA > absB
r = math.Hypot(a, b)
if aGTb {
r = math.Copysign(r, a)
} else {
r = math.Copysign(r, b)
}
c = a / r
s = b / r
if aGTb {
z = s
} else if c != 0 { // r == 0 case handled above
z = 1 / c
} else {
z = 1
}
return
}
// Srotmg computes the modified Givens rotation. See
// http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html
// for more details.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) {
// The implementation of Drotmg used here is taken from Hopkins 1997
// Appendix A: https://doi.org/10.1145/289251.289253
// with the exception of the gam constants below.
const (
gam = 4096.0
gamsq = gam * gam
rgamsq = 1.0 / gamsq
)
if d1 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
if d2 == 0 || y1 == 0 {
p.Flag = blas.Identity
return p, d1, d2, x1
}
var h11, h12, h21, h22 float32
if (d1 == 0 || x1 == 0) && d2 > 0 {
p.Flag = blas.Diagonal
h12 = 1
h21 = -1
x1 = y1
d1, d2 = d2, d1
} else {
p2 := d2 * y1
p1 := d1 * x1
q2 := p2 * y1
q1 := p1 * x1
if math.Abs(q1) > math.Abs(q2) {
p.Flag = blas.OffDiagonal
h11 = 1
h22 = 1
h21 = -y1 / x1
h12 = p2 / p1
u := 1 - h12*h21
if u <= 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
d1 /= u
d2 /= u
x1 *= u
} else {
if q2 < 0 {
p.Flag = blas.Rescaling // Error state.
return p, 0, 0, 0
}
p.Flag = blas.Diagonal
h21 = -1
h12 = 1
h11 = p1 / p2
h22 = x1 / y1
u := 1 + h11*h22
d1, d2 = d2/u, d1/u
x1 = y1 * u
}
}
for d1 <= rgamsq && d1 != 0 {
p.Flag = blas.Rescaling
d1 = (d1 * gam) * gam
x1 /= gam
h11 /= gam
h12 /= gam
}
for d1 > gamsq {
p.Flag = blas.Rescaling
d1 = (d1 / gam) / gam
x1 *= gam
h11 *= gam
h12 *= gam
}
for math.Abs(d2) <= rgamsq && d2 != 0 {
p.Flag = blas.Rescaling
d2 = (d2 * gam) * gam
h21 /= gam
h22 /= gam
}
for math.Abs(d2) > gamsq {
p.Flag = blas.Rescaling
d2 = (d2 / gam) / gam
h21 *= gam
h22 *= gam
}
switch p.Flag {
case blas.Diagonal:
p.H = [4]float32{0: h11, 3: h22}
case blas.OffDiagonal:
p.H = [4]float32{1: h21, 2: h12}
case blas.Rescaling:
p.H = [4]float32{h11, h21, h12, h22}
default:
panic("blas: unexpected blas.Flag")
}
return p, d1, d2, x1
}
// Srot applies a plane transformation.
// x[i] = c * x[i] + s * y[i]
// y[i] = c * y[i] - s * x[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srot(n int, x []float32, incX int, y []float32, incY int, c float32, s float32) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = c*vx+s*vy, c*vy-s*vx
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx
ix += incX
iy += incY
}
}
// Srotm applies the modified Givens rotation to the 2×n matrix.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return
}
panic(nLT0)
}
if (incX > 0 && (n-1)*incX >= len(x)) || (incX < 0 && (1-n)*incX >= len(x)) {
panic(badX)
}
if (incY > 0 && (n-1)*incY >= len(y)) || (incY < 0 && (1-n)*incY >= len(y)) {
panic(badY)
}
if p.Flag == blas.Identity {
return
}
switch p.Flag {
case blas.Rescaling:
h11 := p.H[0]
h12 := p.H[2]
h21 := p.H[1]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy*h12, vx*h21+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy*h12, vx*h21+vy*h22
ix += incX
iy += incY
}
case blas.OffDiagonal:
h12 := p.H[2]
h21 := p.H[1]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx+vy*h12, vx*h21+vy
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx+vy*h12, vx*h21+vy
ix += incX
iy += incY
}
case blas.Diagonal:
h11 := p.H[0]
h22 := p.H[3]
if incX == 1 && incY == 1 {
x = x[:n]
for i, vx := range x {
vy := y[i]
x[i], y[i] = vx*h11+vy, -vx+vy*h22
}
return
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
for i := 0; i < n; i++ {
vx := x[ix]
vy := y[iy]
x[ix], y[iy] = vx*h11+vy, -vx+vy*h22
ix += incX
iy += incY
}
}
}
// Sscal scales x by alpha.
// x[i] *= alpha
// Sscal has no effect if incX < 0.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) {
if incX < 1 {
if incX == 0 {
panic(zeroIncX)
}
return
}
if (n-1)*incX >= len(x) {
panic(badX)
}
if n < 1 {
if n == 0 {
return
}
panic(nLT0)
}
if alpha == 0 {
if incX == 1 {
x = x[:n]
for i := range x {
x[i] = 0
}
return
}
for ix := 0; ix < n*incX; ix += incX {
x[ix] = 0
}
return
}
if incX == 1 {
f32.ScalUnitary(alpha, x[:n])
return
}
f32.ScalInc(alpha, x, uintptr(n), uintptr(incX))
}
@@ -0,0 +1,53 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f32"
)
// Dsdot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return f32.DdotUnitary(x[:n], y)
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(badY)
}
return f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
@@ -0,0 +1,53 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sdot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sdot(n int, x []float32, incX int, y []float32, incY int) float32 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return f32.DotUnitary(x[:n], y)
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(badY)
}
return f32.DotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy))
}
@@ -0,0 +1,53 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sdsdot computes the dot product of the two vectors plus a constant
// alpha + \sum_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32 {
if incX == 0 {
panic(zeroIncX)
}
if incY == 0 {
panic(zeroIncY)
}
if n <= 0 {
if n == 0 {
return 0
}
panic(nLT0)
}
if incX == 1 && incY == 1 {
if len(x) < n {
panic(badX)
}
if len(y) < n {
panic(badY)
}
return alpha + float32(f32.DdotUnitary(x[:n], y))
}
var ix, iy int
if incX < 0 {
ix = (-n + 1) * incX
}
if incY < 0 {
iy = (-n + 1) * incY
}
if ix >= len(x) || ix+(n-1)*incX >= len(x) {
panic(badX)
}
if iy >= len(y) || iy+(n-1)*incY >= len(y) {
panic(badY)
}
return alpha + float32(f32.DdotInc(x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)))
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,833 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f64"
)
var _ blas.Float64Level3 = Implementation{}
// Dtrsm solves one of the matrix equations
// A * X = alpha * B if tA == blas.NoTrans and side == blas.Left
// A^T * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// X * A = alpha * B if tA == blas.NoTrans and side == blas.Right
// X * A^T = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
// scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in-place into X.
//
// No check is made that A is invertible.
func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if ldb < n {
panic(badLdB)
}
var k int
if s == blas.Left {
k = m
} else {
k = n
}
if lda*(k-1)+k > len(a) || lda < max(1, k) {
panic(badLdA)
}
if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
panic(badLdB)
}
if m == 0 || n == 0 {
return
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := m - 1; i >= 0; i-- {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
for j := range btmp {
btmp[j] *= alpha
}
}
for ka, va := range a[i*lda+i+1 : i*lda+m] {
k := ka + i + 1
if va != 0 {
f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
for j := 0; j < n; j++ {
btmp[j] *= tmp
}
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
for j := 0; j < n; j++ {
btmp[j] *= alpha
}
}
for k, va := range a[i*lda : i*lda+i] {
if va != 0 {
f64.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
for j := 0; j < n; j++ {
btmp[j] *= tmp
}
}
}
return
}
// Cases where a is transposed
if ul == blas.Upper {
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
for ia, va := range a[k*lda+k+1 : k*lda+m] {
i := ia + k + 1
if va != 0 {
btmp := b[i*ldb : i*ldb+n]
f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
}
}
if alpha != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= alpha
}
}
}
return
}
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
for i, va := range a[k*lda : k*lda+k] {
if va != 0 {
btmp := b[i*ldb : i*ldb+n]
f64.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
}
}
if alpha != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= alpha
}
}
}
return
}
// Cases where a is to the right of X.
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
for j := 0; j < n; j++ {
btmp[j] *= alpha
}
}
for k, vb := range btmp {
if vb != 0 {
if btmp[k] != 0 {
if nonUnit {
btmp[k] /= a[k*lda+k]
}
btmpk := btmp[k+1 : n]
f64.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk)
}
}
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*lda : i*lda+n]
if alpha != 1 {
for j := 0; j < n; j++ {
btmp[j] *= alpha
}
}
for k := n - 1; k >= 0; k-- {
if btmp[k] != 0 {
if nonUnit {
btmp[k] /= a[k*lda+k]
}
f64.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*lda : i*lda+n]
for j := n - 1; j >= 0; j-- {
tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*lda : i*lda+n]
for j := 0; j < n; j++ {
tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp)
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
}
// Dsymm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C if side == blas.Left
// C = alpha * B * A + beta * C if side == blas.Right
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
// is a scalar.
func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
if s != blas.Right && s != blas.Left {
panic("goblas: bad side")
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
var k int
if s == blas.Left {
k = m
} else {
k = n
}
if lda*(k-1)+k > len(a) || lda < max(1, k) {
panic(badLdA)
}
if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
panic(badLdB)
}
if ldc*(m-1)+n > len(c) || ldc < max(1, n) {
panic(badLdC)
}
if m == 0 || n == 0 {
return
}
if alpha == 0 && beta == 1 {
return
}
if alpha == 0 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] *= beta
}
}
return
}
isUpper := ul == blas.Upper
if s == blas.Left {
for i := 0; i < m; i++ {
atmp := alpha * a[i*lda+i]
btmp := b[i*ldb : i*ldb+n]
ctmp := c[i*ldc : i*ldc+n]
for j, v := range btmp {
ctmp[j] *= beta
ctmp[j] += atmp * v
}
for k := 0; k < i; k++ {
var atmp float64
if isUpper {
atmp = a[k*lda+i]
} else {
atmp = a[i*lda+k]
}
atmp *= alpha
ctmp := c[i*ldc : i*ldc+n]
f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
}
for k := i + 1; k < m; k++ {
var atmp float64
if isUpper {
atmp = a[i*lda+k]
} else {
atmp = a[k*lda+i]
}
atmp *= alpha
ctmp := c[i*ldc : i*ldc+n]
f64.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
}
}
return
}
if isUpper {
for i := 0; i < m; i++ {
for j := n - 1; j >= 0; j-- {
tmp := alpha * b[i*ldb+j]
var tmp2 float64
atmp := a[j*lda+j+1 : j*lda+n]
btmp := b[i*ldb+j+1 : i*ldb+n]
ctmp := c[i*ldc+j+1 : i*ldc+n]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
return
}
for i := 0; i < m; i++ {
for j := 0; j < n; j++ {
tmp := alpha * b[i*ldb+j]
var tmp2 float64
atmp := a[j*lda : j*lda+j]
btmp := b[i*ldb : i*ldb+j]
ctmp := c[i*ldc : i*ldc+j]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
}
// Dsyrk performs one of the symmetric rank-k operations
// C = alpha * A * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
// beta are scalars.
func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if ldc < n {
panic(badLdC)
}
var row, col int
if tA == blas.NoTrans {
row, col = n, k
} else {
row, col = k, n
}
if lda*(row-1)+col > len(a) || lda < max(1, col) {
panic(badLdA)
}
if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
panic(badLdC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
atmp := a[i*lda : i*lda+k]
for jc, vc := range ctmp {
j := jc + i
ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k])
}
}
return
}
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
for j, vc := range c[i*ldc : i*ldc+i+1] {
c[i*ldc+j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp)
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp)
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 0 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f64.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp)
}
}
}
}
// Dsyr2k performs one of the symmetric rank 2k operations
// C = alpha * A * B^T + alpha * B * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * B + alpha * B^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
// alpha and beta are scalars.
func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if ldc < n {
panic(badLdC)
}
var row, col int
if tA == blas.NoTrans {
row, col = n, k
} else {
row, col = k, n
}
if lda*(row-1)+col > len(a) || lda < max(1, col) {
panic(badLdA)
}
if ldb*(row-1)+col > len(b) || ldb < max(1, col) {
panic(badLdB)
}
if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
panic(badLdC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc+i : i*ldc+n]
for jc := range ctmp {
j := i + jc
var tmp1, tmp2 float64
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[jc] *= beta
ctmp[jc] += alpha * (tmp1 + tmp2)
}
}
return
}
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc : i*ldc+i+1]
for j := 0; j <= i; j++ {
var tmp1, tmp2 float64
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[j] *= beta
ctmp[j] += alpha * (tmp1 + tmp2)
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*lda+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb+i : l*ldb+n]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda+i : l*lda+n] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*lda+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb : l*ldb+i+1]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda : l*lda+i+1] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
}
// Dtrmm performs one of the matrix-matrix operations
// B = alpha * A * B if tA == blas.NoTrans and side == blas.Left
// B = alpha * A^T * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// B = alpha * B * A if tA == blas.NoTrans and side == blas.Right
// B = alpha * B * A^T if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
var k int
if s == blas.Left {
k = m
} else {
k = n
}
if lda*(k-1)+k > len(a) || lda < max(1, k) {
panic(badLdA)
}
if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
panic(badLdB)
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] *= tmp
}
for ka, va := range a[i*lda+i+1 : i*lda+m] {
k := ka + i + 1
tmp := alpha * va
if tmp != 0 {
f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
for i := m - 1; i >= 0; i-- {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] *= tmp
}
for k, va := range a[i*lda : i*lda+i] {
tmp := alpha * va
if tmp != 0 {
f64.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
for ia, va := range a[k*lda+k+1 : k*lda+m] {
i := ia + k + 1
btmp := b[i*ldb : i*ldb+n]
tmp := alpha * va
if tmp != 0 {
f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
}
return
}
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
for i, va := range a[k*lda : k*lda+k] {
btmp := b[i*ldb : i*ldb+n]
tmp := alpha * va
if tmp != 0 {
f64.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
}
return
}
// Cases where a is on the right
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := n - 1; k >= 0; k-- {
tmp := alpha * btmp[k]
if tmp != 0 {
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
for ja, v := range a[k*lda+k+1 : k*lda+n] {
j := ja + k + 1
btmp[j] += tmp * v
}
}
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := 0; k < n; k++ {
tmp := alpha * btmp[k]
if tmp != 0 {
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
f64.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j, vb := range btmp {
tmp := vb
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
btmp[j] = alpha * tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := n - 1; j >= 0; j-- {
tmp := btmp[j]
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
btmp[j] = alpha * tmp
}
}
}
@@ -0,0 +1,845 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
var _ blas.Float32Level3 = Implementation{}
// Strsm solves one of the matrix equations
// A * X = alpha * B if tA == blas.NoTrans and side == blas.Left
// A^T * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// X * A = alpha * B if tA == blas.NoTrans and side == blas.Right
// X * A^T = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
// scalar.
//
// At entry to the function, X contains the values of B, and the result is
// stored in-place into X.
//
// No check is made that A is invertible.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
if ldb < n {
panic(badLdB)
}
var k int
if s == blas.Left {
k = m
} else {
k = n
}
if lda*(k-1)+k > len(a) || lda < max(1, k) {
panic(badLdA)
}
if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
panic(badLdB)
}
if m == 0 || n == 0 {
return
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := m - 1; i >= 0; i-- {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
for j := range btmp {
btmp[j] *= alpha
}
}
for ka, va := range a[i*lda+i+1 : i*lda+m] {
k := ka + i + 1
if va != 0 {
f32.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
for j := 0; j < n; j++ {
btmp[j] *= tmp
}
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
for j := 0; j < n; j++ {
btmp[j] *= alpha
}
}
for k, va := range a[i*lda : i*lda+i] {
if va != 0 {
f32.AxpyUnitaryTo(btmp, -va, b[k*ldb:k*ldb+n], btmp)
}
}
if nonUnit {
tmp := 1 / a[i*lda+i]
for j := 0; j < n; j++ {
btmp[j] *= tmp
}
}
}
return
}
// Cases where a is transposed
if ul == blas.Upper {
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
for ia, va := range a[k*lda+k+1 : k*lda+m] {
i := ia + k + 1
if va != 0 {
btmp := b[i*ldb : i*ldb+n]
f32.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
}
}
if alpha != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= alpha
}
}
}
return
}
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
if nonUnit {
tmp := 1 / a[k*lda+k]
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
for i, va := range a[k*lda : k*lda+k] {
if va != 0 {
btmp := b[i*ldb : i*ldb+n]
f32.AxpyUnitaryTo(btmp, -va, btmpk, btmp)
}
}
if alpha != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= alpha
}
}
}
return
}
// Cases where a is to the right of X.
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
if alpha != 1 {
for j := 0; j < n; j++ {
btmp[j] *= alpha
}
}
for k, vb := range btmp {
if vb != 0 {
if btmp[k] != 0 {
if nonUnit {
btmp[k] /= a[k*lda+k]
}
btmpk := btmp[k+1 : n]
f32.AxpyUnitaryTo(btmpk, -btmp[k], a[k*lda+k+1:k*lda+n], btmpk)
}
}
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*lda : i*lda+n]
if alpha != 1 {
for j := 0; j < n; j++ {
btmp[j] *= alpha
}
}
for k := n - 1; k >= 0; k-- {
if btmp[k] != 0 {
if nonUnit {
btmp[k] /= a[k*lda+k]
}
f32.AxpyUnitaryTo(btmp, -btmp[k], a[k*lda:k*lda+k], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*lda : i*lda+n]
for j := n - 1; j >= 0; j-- {
tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*lda : i*lda+n]
for j := 0; j < n; j++ {
tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp)
if nonUnit {
tmp /= a[j*lda+j]
}
btmp[j] = tmp
}
}
}
// Ssymm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C if side == blas.Left
// C = alpha * B * A + beta * C if side == blas.Right
// where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
// is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
if s != blas.Right && s != blas.Left {
panic("goblas: bad side")
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
var k int
if s == blas.Left {
k = m
} else {
k = n
}
if lda*(k-1)+k > len(a) || lda < max(1, k) {
panic(badLdA)
}
if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
panic(badLdB)
}
if ldc*(m-1)+n > len(c) || ldc < max(1, n) {
panic(badLdC)
}
if m == 0 || n == 0 {
return
}
if alpha == 0 && beta == 1 {
return
}
if alpha == 0 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] *= beta
}
}
return
}
isUpper := ul == blas.Upper
if s == blas.Left {
for i := 0; i < m; i++ {
atmp := alpha * a[i*lda+i]
btmp := b[i*ldb : i*ldb+n]
ctmp := c[i*ldc : i*ldc+n]
for j, v := range btmp {
ctmp[j] *= beta
ctmp[j] += atmp * v
}
for k := 0; k < i; k++ {
var atmp float32
if isUpper {
atmp = a[k*lda+i]
} else {
atmp = a[i*lda+k]
}
atmp *= alpha
ctmp := c[i*ldc : i*ldc+n]
f32.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
}
for k := i + 1; k < m; k++ {
var atmp float32
if isUpper {
atmp = a[i*lda+k]
} else {
atmp = a[k*lda+i]
}
atmp *= alpha
ctmp := c[i*ldc : i*ldc+n]
f32.AxpyUnitaryTo(ctmp, atmp, b[k*ldb:k*ldb+n], ctmp)
}
}
return
}
if isUpper {
for i := 0; i < m; i++ {
for j := n - 1; j >= 0; j-- {
tmp := alpha * b[i*ldb+j]
var tmp2 float32
atmp := a[j*lda+j+1 : j*lda+n]
btmp := b[i*ldb+j+1 : i*ldb+n]
ctmp := c[i*ldc+j+1 : i*ldc+n]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
return
}
for i := 0; i < m; i++ {
for j := 0; j < n; j++ {
tmp := alpha * b[i*ldb+j]
var tmp2 float32
atmp := a[j*lda : j*lda+j]
btmp := b[i*ldb : i*ldb+j]
ctmp := c[i*ldc : i*ldc+j]
for k, v := range atmp {
ctmp[k] += tmp * v
tmp2 += btmp[k] * v
}
c[i*ldc+j] *= beta
c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
}
}
}
// Ssyrk performs one of the symmetric rank-k operations
// C = alpha * A * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
// beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if ldc < n {
panic(badLdC)
}
var row, col int
if tA == blas.NoTrans {
row, col = n, k
} else {
row, col = k, n
}
if lda*(row-1)+col > len(a) || lda < max(1, col) {
panic(badLdA)
}
if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
panic(badLdC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
atmp := a[i*lda : i*lda+k]
for jc, vc := range ctmp {
j := jc + i
ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k])
}
}
return
}
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
for j, vc := range c[i*ldc : i*ldc+i+1] {
c[i*ldc+j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp)
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f32.AxpyUnitaryTo(ctmp, tmp, a[l*lda+i:l*lda+n], ctmp)
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 0 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp := alpha * a[l*lda+i]
if tmp != 0 {
f32.AxpyUnitaryTo(ctmp, tmp, a[l*lda:l*lda+i+1], ctmp)
}
}
}
}
// Ssyr2k performs one of the symmetric rank 2k operations
// C = alpha * A * B^T + alpha * B * A^T + beta * C if tA == blas.NoTrans
// C = alpha * A^T * B + alpha * B^T * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans
// where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
// alpha and beta are scalars.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
panic(badTranspose)
}
if n < 0 {
panic(nLT0)
}
if k < 0 {
panic(kLT0)
}
if ldc < n {
panic(badLdC)
}
var row, col int
if tA == blas.NoTrans {
row, col = n, k
} else {
row, col = k, n
}
if lda*(row-1)+col > len(a) || lda < max(1, col) {
panic(badLdA)
}
if ldb*(row-1)+col > len(b) || ldb < max(1, col) {
panic(badLdB)
}
if ldc*(n-1)+n > len(c) || ldc < max(1, n) {
panic(badLdC)
}
if alpha == 0 {
if beta == 0 {
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] = 0
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
for j := range ctmp {
ctmp[j] *= beta
}
}
return
}
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc+i : i*ldc+n]
for jc := range ctmp {
j := i + jc
var tmp1, tmp2 float32
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[jc] *= beta
ctmp[jc] += alpha * (tmp1 + tmp2)
}
}
return
}
for i := 0; i < n; i++ {
atmp := a[i*lda : i*lda+k]
btmp := b[i*ldb : i*ldb+k]
ctmp := c[i*ldc : i*ldc+i+1]
for j := 0; j <= i; j++ {
var tmp1, tmp2 float32
binner := b[j*ldb : j*ldb+k]
for l, v := range a[j*lda : j*lda+k] {
tmp1 += v * btmp[l]
tmp2 += atmp[l] * binner[l]
}
ctmp[j] *= beta
ctmp[j] += alpha * (tmp1 + tmp2)
}
}
return
}
if ul == blas.Upper {
for i := 0; i < n; i++ {
ctmp := c[i*ldc+i : i*ldc+n]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*lda+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb+i : l*ldb+n]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda+i : l*lda+n] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
return
}
for i := 0; i < n; i++ {
ctmp := c[i*ldc : i*ldc+i+1]
if beta != 1 {
for j := range ctmp {
ctmp[j] *= beta
}
}
for l := 0; l < k; l++ {
tmp1 := alpha * b[l*lda+i]
tmp2 := alpha * a[l*lda+i]
btmp := b[l*ldb : l*ldb+i+1]
if tmp1 != 0 || tmp2 != 0 {
for j, v := range a[l*lda : l*lda+i+1] {
ctmp[j] += v*tmp1 + btmp[j]*tmp2
}
}
}
}
}
// Strmm performs one of the matrix-matrix operations
// B = alpha * A * B if tA == blas.NoTrans and side == blas.Left
// B = alpha * A^T * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
// B = alpha * B * A if tA == blas.NoTrans and side == blas.Right
// B = alpha * B * A^T if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
// where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
if s != blas.Left && s != blas.Right {
panic(badSide)
}
if ul != blas.Lower && ul != blas.Upper {
panic(badUplo)
}
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if d != blas.NonUnit && d != blas.Unit {
panic(badDiag)
}
if m < 0 {
panic(mLT0)
}
if n < 0 {
panic(nLT0)
}
var k int
if s == blas.Left {
k = m
} else {
k = n
}
if lda*(k-1)+k > len(a) || lda < max(1, k) {
panic(badLdA)
}
if ldb*(m-1)+n > len(b) || ldb < max(1, n) {
panic(badLdB)
}
if alpha == 0 {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] = 0
}
}
return
}
nonUnit := d == blas.NonUnit
if s == blas.Left {
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] *= tmp
}
for ka, va := range a[i*lda+i+1 : i*lda+m] {
k := ka + i + 1
tmp := alpha * va
if tmp != 0 {
f32.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
for i := m - 1; i >= 0; i-- {
tmp := alpha
if nonUnit {
tmp *= a[i*lda+i]
}
btmp := b[i*ldb : i*ldb+n]
for j := range btmp {
btmp[j] *= tmp
}
for k, va := range a[i*lda : i*lda+i] {
tmp := alpha * va
if tmp != 0 {
f32.AxpyUnitaryTo(btmp, tmp, b[k*ldb:k*ldb+n], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for k := m - 1; k >= 0; k-- {
btmpk := b[k*ldb : k*ldb+n]
for ia, va := range a[k*lda+k+1 : k*lda+m] {
i := ia + k + 1
btmp := b[i*ldb : i*ldb+n]
tmp := alpha * va
if tmp != 0 {
f32.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
}
return
}
for k := 0; k < m; k++ {
btmpk := b[k*ldb : k*ldb+n]
for i, va := range a[k*lda : k*lda+k] {
btmp := b[i*ldb : i*ldb+n]
tmp := alpha * va
if tmp != 0 {
f32.AxpyUnitaryTo(btmp, tmp, btmpk, btmp)
}
}
tmp := alpha
if nonUnit {
tmp *= a[k*lda+k]
}
if tmp != 1 {
for j := 0; j < n; j++ {
btmpk[j] *= tmp
}
}
}
return
}
// Cases where a is on the right
if tA == blas.NoTrans {
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := n - 1; k >= 0; k-- {
tmp := alpha * btmp[k]
if tmp != 0 {
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
for ja, v := range a[k*lda+k+1 : k*lda+n] {
j := ja + k + 1
btmp[j] += tmp * v
}
}
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for k := 0; k < n; k++ {
tmp := alpha * btmp[k]
if tmp != 0 {
btmp[k] = tmp
if nonUnit {
btmp[k] *= a[k*lda+k]
}
f32.AxpyUnitaryTo(btmp, tmp, a[k*lda:k*lda+k], btmp)
}
}
}
return
}
// Cases where a is transposed.
if ul == blas.Upper {
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j, vb := range btmp {
tmp := vb
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
btmp[j] = alpha * tmp
}
}
return
}
for i := 0; i < m; i++ {
btmp := b[i*ldb : i*ldb+n]
for j := n - 1; j >= 0; j-- {
tmp := btmp[j]
if nonUnit {
tmp *= a[j*lda+j]
}
tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
btmp[j] = alpha * tmp
}
}
}
@@ -0,0 +1,269 @@
// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"runtime"
"sync"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/internal/asm/f32"
)
// Sgemm performs one of the matrix-matrix operations
// C = alpha * A * B + beta * C
// C = alpha * A^T * B + beta * C
// C = alpha * A * B^T + beta * C
// C = alpha * A^T * B^T + beta * C
// where A is an m×k or k×m dense matrix, B is an n×k or k×n dense matrix, C is
// an m×n matrix, and alpha and beta are scalars. tA and tB specify whether A or
// B are transposed.
//
// Float32 implementations are autogenerated and not directly tested.
func (Implementation) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
panic(badTranspose)
}
if tB != blas.NoTrans && tB != blas.Trans && tB != blas.ConjTrans {
panic(badTranspose)
}
aTrans := tA == blas.Trans || tA == blas.ConjTrans
if aTrans {
checkSMatrix('a', k, m, a, lda)
} else {
checkSMatrix('a', m, k, a, lda)
}
bTrans := tB == blas.Trans || tB == blas.ConjTrans
if bTrans {
checkSMatrix('b', n, k, b, ldb)
} else {
checkSMatrix('b', k, n, b, ldb)
}
checkSMatrix('c', m, n, c, ldc)
// scale c
if beta != 1 {
if beta == 0 {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] = 0
}
}
} else {
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for j := range ctmp {
ctmp[j] *= beta
}
}
}
}
sgemmParallel(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
}
func sgemmParallel(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// dgemmParallel computes a parallel matrix multiplication by partitioning
// a and b into sub-blocks, and updating c with the multiplication of the sub-block
// In all cases,
// A = [ A_11 A_12 ... A_1j
// A_21 A_22 ... A_2j
// ...
// A_i1 A_i2 ... A_ij]
//
// and same for B. All of the submatrix sizes are blockSize×blockSize except
// at the edges.
//
// In all cases, there is one dimension for each matrix along which
// C must be updated sequentially.
// Cij = \sum_k Aik Bki, (A * B)
// Cij = \sum_k Aki Bkj, (A^T * B)
// Cij = \sum_k Aik Bjk, (A * B^T)
// Cij = \sum_k Aki Bjk, (A^T * B^T)
//
// This code computes one {i, j} block sequentially along the k dimension,
// and computes all of the {i, j} blocks concurrently. This
// partitioning allows Cij to be updated in-place without race-conditions.
// Instead of launching a goroutine for each possible concurrent computation,
// a number of worker goroutines are created and channels are used to pass
// available and completed cases.
//
// http://alexkr.com/docs/matrixmult.pdf is a good reference on matrix-matrix
// multiplies, though this code does not copy matrices to attempt to eliminate
// cache misses.
maxKLen := k
parBlocks := blocks(m, blockSize) * blocks(n, blockSize)
if parBlocks < minParBlock {
// The matrix multiplication is small in the dimensions where it can be
// computed concurrently. Just do it in serial.
sgemmSerial(aTrans, bTrans, m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
}
nWorkers := runtime.GOMAXPROCS(0)
if parBlocks < nWorkers {
nWorkers = parBlocks
}
// There is a tradeoff between the workers having to wait for work
// and a large buffer making operations slow.
buf := buffMul * nWorkers
if buf > parBlocks {
buf = parBlocks
}
sendChan := make(chan subMul, buf)
// Launch workers. A worker receives an {i, j} submatrix of c, and computes
// A_ik B_ki (or the transposed version) storing the result in c_ij. When the
// channel is finally closed, it signals to the waitgroup that it has finished
// computing.
var wg sync.WaitGroup
for i := 0; i < nWorkers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
// Make local copies of otherwise global variables to reduce shared memory.
// This has a noticeable effect on benchmarks in some cases.
alpha := alpha
aTrans := aTrans
bTrans := bTrans
m := m
n := n
for sub := range sendChan {
i := sub.i
j := sub.j
leni := blockSize
if i+leni > m {
leni = m - i
}
lenj := blockSize
if j+lenj > n {
lenj = n - j
}
cSub := sliceView32(c, ldc, i, j, leni, lenj)
// Compute A_ik B_kj for all k
for k := 0; k < maxKLen; k += blockSize {
lenk := blockSize
if k+lenk > maxKLen {
lenk = maxKLen - k
}
var aSub, bSub []float32
if aTrans {
aSub = sliceView32(a, lda, k, i, lenk, leni)
} else {
aSub = sliceView32(a, lda, i, k, leni, lenk)
}
if bTrans {
bSub = sliceView32(b, ldb, j, k, lenj, lenk)
} else {
bSub = sliceView32(b, ldb, k, j, lenk, lenj)
}
sgemmSerial(aTrans, bTrans, leni, lenj, lenk, aSub, lda, bSub, ldb, cSub, ldc, alpha)
}
}
}()
}
// Send out all of the {i, j} subblocks for computation.
for i := 0; i < m; i += blockSize {
for j := 0; j < n; j += blockSize {
sendChan <- subMul{
i: i,
j: j,
}
}
}
close(sendChan)
wg.Wait()
}
// sgemmSerial is serial matrix multiply
func sgemmSerial(aTrans, bTrans bool, m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
switch {
case !aTrans && !bTrans:
sgemmSerialNotNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && !bTrans:
sgemmSerialTransNot(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case !aTrans && bTrans:
sgemmSerialNotTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
case aTrans && bTrans:
sgemmSerialTransTrans(m, n, k, a, lda, b, ldb, c, ldc, alpha)
return
default:
panic("unreachable")
}
}
// sgemmSerial where neither a nor b are transposed
func sgemmSerialNotNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
ctmp := c[i*ldc : i*ldc+n]
for l, v := range a[i*lda : i*lda+k] {
tmp := alpha * v
if tmp != 0 {
f32.AxpyUnitaryTo(ctmp, tmp, b[l*ldb:l*ldb+n], ctmp)
}
}
}
}
// sgemmSerial where neither a is transposed and b is not
func sgemmSerialTransNot(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
btmp := b[l*ldb : l*ldb+n]
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f32.AxpyUnitaryTo(ctmp, tmp, btmp, ctmp)
}
}
}
}
// sgemmSerial where neither a is not transposed and b is
func sgemmSerialNotTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for i := 0; i < m; i++ {
atmp := a[i*lda : i*lda+k]
ctmp := c[i*ldc : i*ldc+n]
for j := 0; j < n; j++ {
ctmp[j] += alpha * f32.DotUnitary(atmp, b[j*ldb:j*ldb+k])
}
}
}
// sgemmSerial where both are transposed
func sgemmSerialTransTrans(m, n, k int, a []float32, lda int, b []float32, ldb int, c []float32, ldc int, alpha float32) {
// This style is used instead of the literal [i*stride +j]) is used because
// approximately 5 times faster as of go 1.3.
for l := 0; l < k; l++ {
for i, v := range a[l*lda : l*lda+m] {
tmp := alpha * v
if tmp != 0 {
ctmp := c[i*ldc : i*ldc+n]
f32.AxpyInc(tmp, b[l:], ctmp, uintptr(n), uintptr(ldb), 1, 0, 0)
}
}
}
}
func sliceView32(a []float32, lda, i, j, r, c int) []float32 {
return a[i*lda+j : (i+r-1)*lda+j+c]
}
+145
View File
@@ -0,0 +1,145 @@
#!/usr/bin/env bash
# Copyright ©2015 The Gonum Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
WARNING='//\
// Float32 implementations are autogenerated and not directly tested.\
'
# Level1 routines.
echo Generating level1single.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1single.go
cat level1double.go \
| gofmt -r 'blas.Float64Level1 -> blas.Float32Level1' \
\
| gofmt -r 'float64 -> float32' \
| gofmt -r 'blas.DrotmParams -> blas.SrotmParams' \
\
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
| gofmt -r 'f64.ScalInc -> f32.ScalInc' \
| gofmt -r 'f64.ScalUnitary -> f32.ScalUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \
-e 's_^// D_// S_' \
-e "s_^\(func (Implementation) \)Id\(.*\)\$_$WARNING\1Is\2_" \
-e 's_^// Id_// Is_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
-e 's_"math"_math "gonum.org/v1/gonum/internal/math32"_' \
>> level1single.go
echo Generating level1single_sdot.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1single_sdot.go
cat level1double_ddot.go \
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.DotInc -> f32.DotInc' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \
-e 's_^// D_// S_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level1single_sdot.go
echo Generating level1single_dsdot.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1single_dsdot.go
cat level1double_ddot.go \
| gofmt -r '[]float64 -> []float32' \
\
| gofmt -r 'f64.DotInc -> f32.DdotInc' \
| gofmt -r 'f64.DotUnitary -> f32.DdotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1Ds\2_" \
-e 's_^// D_// Ds_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level1single_dsdot.go
echo Generating level1single_sdsdot.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level1single_sdsdot.go
cat level1double_ddot.go \
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.DotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)) -> alpha + float32(f32.DdotInc(x, y, f(n), f(incX), f(incY), f(ix), f(iy)))' \
| gofmt -r 'f64.DotUnitary(a, b) -> alpha + float32(f32.DdotUnitary(a, b))' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1Sds\2_" \
-e 's_^// D\(.*\)$_// Sds\1 plus a constant_' \
-e 's_\\sum_alpha + \\sum_' \
-e 's/n int/n int, alpha float32/' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level1single_sdsdot.go
# Level2 routines.
echo Generating level2single.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level2single.go
cat level2double.go \
| gofmt -r 'blas.Float64Level2 -> blas.Float32Level2' \
\
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'Dscal -> Sscal' \
\
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \
| gofmt -r 'f64.AxpyUnitary -> f32.AxpyUnitary' \
| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \
| gofmt -r 'f64.DotInc -> f32.DotInc' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
| gofmt -r 'f64.Ger -> f32.Ger' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \
-e 's_^// D_// S_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level2single.go
# Level3 routines.
echo Generating level3single.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > level3single.go
cat level3double.go \
| gofmt -r 'blas.Float64Level3 -> blas.Float32Level3' \
\
| gofmt -r 'float64 -> float32' \
\
| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \
-e 's_^// D_// S_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> level3single.go
echo Generating sgemm.go
echo -e '// Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.\n' > sgemm.go
cat dgemm.go \
| gofmt -r 'float64 -> float32' \
| gofmt -r 'sliceView64 -> sliceView32' \
| gofmt -r 'checkDMatrix -> checkSMatrix' \
\
| gofmt -r 'dgemmParallel -> sgemmParallel' \
| gofmt -r 'computeNumBlocks64 -> computeNumBlocks32' \
| gofmt -r 'dgemmSerial -> sgemmSerial' \
| gofmt -r 'dgemmSerialNotNot -> sgemmSerialNotNot' \
| gofmt -r 'dgemmSerialTransNot -> sgemmSerialTransNot' \
| gofmt -r 'dgemmSerialNotTrans -> sgemmSerialNotTrans' \
| gofmt -r 'dgemmSerialTransTrans -> sgemmSerialTransTrans' \
\
| gofmt -r 'f64.AxpyInc -> f32.AxpyInc' \
| gofmt -r 'f64.AxpyIncTo -> f32.AxpyIncTo' \
| gofmt -r 'f64.AxpyUnitaryTo -> f32.AxpyUnitaryTo' \
| gofmt -r 'f64.DotUnitary -> f32.DotUnitary' \
\
| sed -e "s_^\(func (Implementation) \)D\(.*\)\$_$WARNING\1S\2_" \
-e 's_^// D_// S_' \
-e 's_^// d_// s_' \
-e 's_"gonum.org/v1/gonum/internal/asm/f64"_"gonum.org/v1/gonum/internal/asm/f32"_' \
>> sgemm.go
@@ -0,0 +1,4 @@
# Gonum floats [![GoDoc](https://godoc.org/gonum.org/v1/gonum/floats?status.svg)](https://godoc.org/gonum.org/v1/gonum/floats)
Package floats provides a set of helper routines for dealing with slices of float64.
The functions avoid allocations to allow for use within tight loops without garbage collection overhead.
+11
View File
@@ -0,0 +1,11 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package floats provides a set of helper routines for dealing with slices
// of float64. The functions avoid allocations to allow for use within tight
// loops without garbage collection overhead.
//
// The convention used is that when a slice is being modified in place, it has
// the name dst.
package floats
+928
View File
@@ -0,0 +1,928 @@
// Copyright 2013 The Gonum Authors. All rights reserved.
// Use of this code is governed by a BSD-style
// license that can be found in the LICENSE file
package floats
import (
"errors"
"math"
"sort"
"strconv"
"gonum.org/v1/gonum/internal/asm/f64"
)
// Add adds, element-wise, the elements of s and dst, and stores in dst.
// Panics if the lengths of dst and s do not match.
func Add(dst, s []float64) {
if len(dst) != len(s) {
panic("floats: length of the slices do not match")
}
f64.AxpyUnitaryTo(dst, 1, s, dst)
}
// AddTo adds, element-wise, the elements of s and t and
// stores the result in dst. Panics if the lengths of s, t and dst do not match.
func AddTo(dst, s, t []float64) []float64 {
if len(s) != len(t) {
panic("floats: length of adders do not match")
}
if len(dst) != len(s) {
panic("floats: length of destination does not match length of adder")
}
f64.AxpyUnitaryTo(dst, 1, s, t)
return dst
}
// AddConst adds the scalar c to all of the values in dst.
func AddConst(c float64, dst []float64) {
for i := range dst {
dst[i] += c
}
}
// AddScaled performs dst = dst + alpha * s.
// It panics if the lengths of dst and s are not equal.
func AddScaled(dst []float64, alpha float64, s []float64) {
if len(dst) != len(s) {
panic("floats: length of destination and source to not match")
}
f64.AxpyUnitaryTo(dst, alpha, s, dst)
}
// AddScaledTo performs dst = y + alpha * s, where alpha is a scalar,
// and dst, y and s are all slices.
// It panics if the lengths of dst, y, and s are not equal.
//
// At the return of the function, dst[i] = y[i] + alpha * s[i]
func AddScaledTo(dst, y []float64, alpha float64, s []float64) []float64 {
if len(dst) != len(s) || len(dst) != len(y) {
panic("floats: lengths of slices do not match")
}
f64.AxpyUnitaryTo(dst, alpha, s, y)
return dst
}
// argsort is a helper that implements sort.Interface, as used by
// Argsort.
type argsort struct {
s []float64
inds []int
}
func (a argsort) Len() int {
return len(a.s)
}
func (a argsort) Less(i, j int) bool {
return a.s[i] < a.s[j]
}
func (a argsort) Swap(i, j int) {
a.s[i], a.s[j] = a.s[j], a.s[i]
a.inds[i], a.inds[j] = a.inds[j], a.inds[i]
}
// Argsort sorts the elements of dst while tracking their original order.
// At the conclusion of Argsort, dst will contain the original elements of dst
// but sorted in increasing order, and inds will contain the original position
// of the elements in the slice such that dst[i] = origDst[inds[i]].
// It panics if the lengths of dst and inds do not match.
func Argsort(dst []float64, inds []int) {
if len(dst) != len(inds) {
panic("floats: length of inds does not match length of slice")
}
for i := range dst {
inds[i] = i
}
a := argsort{s: dst, inds: inds}
sort.Sort(a)
}
// Count applies the function f to every element of s and returns the number
// of times the function returned true.
func Count(f func(float64) bool, s []float64) int {
var n int
for _, val := range s {
if f(val) {
n++
}
}
return n
}
// CumProd finds the cumulative product of the first i elements in
// s and puts them in place into the ith element of the
// destination dst. A panic will occur if the lengths of arguments
// do not match.
//
// At the return of the function, dst[i] = s[i] * s[i-1] * s[i-2] * ...
func CumProd(dst, s []float64) []float64 {
if len(dst) != len(s) {
panic("floats: length of destination does not match length of the source")
}
if len(dst) == 0 {
return dst
}
return f64.CumProd(dst, s)
}
// CumSum finds the cumulative sum of the first i elements in
// s and puts them in place into the ith element of the
// destination dst. A panic will occur if the lengths of arguments
// do not match.
//
// At the return of the function, dst[i] = s[i] + s[i-1] + s[i-2] + ...
func CumSum(dst, s []float64) []float64 {
if len(dst) != len(s) {
panic("floats: length of destination does not match length of the source")
}
if len(dst) == 0 {
return dst
}
return f64.CumSum(dst, s)
}
// Distance computes the L-norm of s - t. See Norm for special cases.
// A panic will occur if the lengths of s and t do not match.
func Distance(s, t []float64, L float64) float64 {
if len(s) != len(t) {
panic("floats: slice lengths do not match")
}
if len(s) == 0 {
return 0
}
var norm float64
if L == 2 {
for i, v := range s {
diff := t[i] - v
norm = math.Hypot(norm, diff)
}
return norm
}
if L == 1 {
for i, v := range s {
norm += math.Abs(t[i] - v)
}
return norm
}
if math.IsInf(L, 1) {
for i, v := range s {
absDiff := math.Abs(t[i] - v)
if absDiff > norm {
norm = absDiff
}
}
return norm
}
for i, v := range s {
norm += math.Pow(math.Abs(t[i]-v), L)
}
return math.Pow(norm, 1/L)
}
// Div performs element-wise division dst / s
// and stores the value in dst. It panics if the
// lengths of s and t are not equal.
func Div(dst, s []float64) {
if len(dst) != len(s) {
panic("floats: slice lengths do not match")
}
f64.Div(dst, s)
}
// DivTo performs element-wise division s / t
// and stores the value in dst. It panics if the
// lengths of s, t, and dst are not equal.
func DivTo(dst, s, t []float64) []float64 {
if len(s) != len(t) || len(dst) != len(t) {
panic("floats: slice lengths do not match")
}
return f64.DivTo(dst, s, t)
}
// Dot computes the dot product of s1 and s2, i.e.
// sum_{i = 1}^N s1[i]*s2[i].
// A panic will occur if lengths of arguments do not match.
func Dot(s1, s2 []float64) float64 {
if len(s1) != len(s2) {
panic("floats: lengths of the slices do not match")
}
return f64.DotUnitary(s1, s2)
}
// Equal returns true if the slices have equal lengths and
// all elements are numerically identical.
func Equal(s1, s2 []float64) bool {
if len(s1) != len(s2) {
return false
}
for i, val := range s1 {
if s2[i] != val {
return false
}
}
return true
}
// EqualApprox returns true if the slices have equal lengths and
// all element pairs have an absolute tolerance less than tol or a
// relative tolerance less than tol.
func EqualApprox(s1, s2 []float64, tol float64) bool {
if len(s1) != len(s2) {
return false
}
for i, a := range s1 {
if !EqualWithinAbsOrRel(a, s2[i], tol, tol) {
return false
}
}
return true
}
// EqualFunc returns true if the slices have the same lengths
// and the function returns true for all element pairs.
func EqualFunc(s1, s2 []float64, f func(float64, float64) bool) bool {
if len(s1) != len(s2) {
return false
}
for i, val := range s1 {
if !f(val, s2[i]) {
return false
}
}
return true
}
// EqualWithinAbs returns true if a and b have an absolute
// difference of less than tol.
func EqualWithinAbs(a, b, tol float64) bool {
return a == b || math.Abs(a-b) <= tol
}
const minNormalFloat64 = 2.2250738585072014e-308
// EqualWithinRel returns true if the difference between a and b
// is not greater than tol times the greater value.
func EqualWithinRel(a, b, tol float64) bool {
if a == b {
return true
}
delta := math.Abs(a - b)
if delta <= minNormalFloat64 {
return delta <= tol*minNormalFloat64
}
// We depend on the division in this relationship to identify
// infinities (we rely on the NaN to fail the test) otherwise
// we compare Infs of the same sign and evaluate Infs as equal
// independent of sign.
return delta/math.Max(math.Abs(a), math.Abs(b)) <= tol
}
// EqualWithinAbsOrRel returns true if a and b are equal to within
// the absolute tolerance.
func EqualWithinAbsOrRel(a, b, absTol, relTol float64) bool {
if EqualWithinAbs(a, b, absTol) {
return true
}
return EqualWithinRel(a, b, relTol)
}
// EqualWithinULP returns true if a and b are equal to within
// the specified number of floating point units in the last place.
func EqualWithinULP(a, b float64, ulp uint) bool {
if a == b {
return true
}
if math.IsNaN(a) || math.IsNaN(b) {
return false
}
if math.Signbit(a) != math.Signbit(b) {
return math.Float64bits(math.Abs(a))+math.Float64bits(math.Abs(b)) <= uint64(ulp)
}
return ulpDiff(math.Float64bits(a), math.Float64bits(b)) <= uint64(ulp)
}
func ulpDiff(a, b uint64) uint64 {
if a > b {
return a - b
}
return b - a
}
// EqualLengths returns true if all of the slices have equal length,
// and false otherwise. Returns true if there are no input slices.
func EqualLengths(slices ...[]float64) bool {
// This length check is needed: http://play.golang.org/p/sdty6YiLhM
if len(slices) == 0 {
return true
}
l := len(slices[0])
for i := 1; i < len(slices); i++ {
if len(slices[i]) != l {
return false
}
}
return true
}
// Find applies f to every element of s and returns the indices of the first
// k elements for which the f returns true, or all such elements
// if k < 0.
// Find will reslice inds to have 0 length, and will append
// found indices to inds.
// If k > 0 and there are fewer than k elements in s satisfying f,
// all of the found elements will be returned along with an error.
// At the return of the function, the input inds will be in an undetermined state.
func Find(inds []int, f func(float64) bool, s []float64, k int) ([]int, error) {
// inds is also returned to allow for calling with nil
// Reslice inds to have zero length
inds = inds[:0]
// If zero elements requested, can just return
if k == 0 {
return inds, nil
}
// If k < 0, return all of the found indices
if k < 0 {
for i, val := range s {
if f(val) {
inds = append(inds, i)
}
}
return inds, nil
}
// Otherwise, find the first k elements
nFound := 0
for i, val := range s {
if f(val) {
inds = append(inds, i)
nFound++
if nFound == k {
return inds, nil
}
}
}
// Finished iterating over the loop, which means k elements were not found
return inds, errors.New("floats: insufficient elements found")
}
// HasNaN returns true if the slice s has any values that are NaN and false
// otherwise.
func HasNaN(s []float64) bool {
for _, v := range s {
if math.IsNaN(v) {
return true
}
}
return false
}
// LogSpan returns a set of n equally spaced points in log space between,
// l and u where N is equal to len(dst). The first element of the
// resulting dst will be l and the final element of dst will be u.
// Panics if len(dst) < 2
// Note that this call will return NaNs if either l or u are negative, and
// will return all zeros if l or u is zero.
// Also returns the mutated slice dst, so that it can be used in range, like:
//
// for i, x := range LogSpan(dst, l, u) { ... }
func LogSpan(dst []float64, l, u float64) []float64 {
Span(dst, math.Log(l), math.Log(u))
for i := range dst {
dst[i] = math.Exp(dst[i])
}
return dst
}
// LogSumExp returns the log of the sum of the exponentials of the values in s.
// Panics if s is an empty slice.
func LogSumExp(s []float64) float64 {
// Want to do this in a numerically stable way which avoids
// overflow and underflow
// First, find the maximum value in the slice.
maxval := Max(s)
if math.IsInf(maxval, 0) {
// If it's infinity either way, the logsumexp will be infinity as well
// returning now avoids NaNs
return maxval
}
var lse float64
// Compute the sumexp part
for _, val := range s {
lse += math.Exp(val - maxval)
}
// Take the log and add back on the constant taken out
return math.Log(lse) + maxval
}
// Max returns the maximum value in the input slice. If the slice is empty, Max will panic.
func Max(s []float64) float64 {
return s[MaxIdx(s)]
}
// MaxIdx returns the index of the maximum value in the input slice. If several
// entries have the maximum value, the first such index is returned. If the slice
// is empty, MaxIdx will panic.
func MaxIdx(s []float64) int {
if len(s) == 0 {
panic("floats: zero slice length")
}
max := math.NaN()
var ind int
for i, v := range s {
if math.IsNaN(v) {
continue
}
if v > max || math.IsNaN(max) {
max = v
ind = i
}
}
return ind
}
// Min returns the maximum value in the input slice. If the slice is empty, Min will panic.
func Min(s []float64) float64 {
return s[MinIdx(s)]
}
// MinIdx returns the index of the minimum value in the input slice. If several
// entries have the maximum value, the first such index is returned. If the slice
// is empty, MinIdx will panic.
func MinIdx(s []float64) int {
if len(s) == 0 {
panic("floats: zero slice length")
}
min := math.NaN()
var ind int
for i, v := range s {
if math.IsNaN(v) {
continue
}
if v < min || math.IsNaN(min) {
min = v
ind = i
}
}
return ind
}
// Mul performs element-wise multiplication between dst
// and s and stores the value in dst. Panics if the
// lengths of s and t are not equal.
func Mul(dst, s []float64) {
if len(dst) != len(s) {
panic("floats: slice lengths do not match")
}
for i, val := range s {
dst[i] *= val
}
}
// MulTo performs element-wise multiplication between s
// and t and stores the value in dst. Panics if the
// lengths of s, t, and dst are not equal.
func MulTo(dst, s, t []float64) []float64 {
if len(s) != len(t) || len(dst) != len(t) {
panic("floats: slice lengths do not match")
}
for i, val := range t {
dst[i] = val * s[i]
}
return dst
}
const (
nanBits = 0x7ff8000000000000
nanMask = 0xfff8000000000000
)
// NaNWith returns an IEEE 754 "quiet not-a-number" value with the
// payload specified in the low 51 bits of payload.
// The NaN returned by math.NaN has a bit pattern equal to NaNWith(1).
func NaNWith(payload uint64) float64 {
return math.Float64frombits(nanBits | (payload &^ nanMask))
}
// NaNPayload returns the lowest 51 bits payload of an IEEE 754 "quiet
// not-a-number". For values of f other than quiet-NaN, NaNPayload
// returns zero and false.
func NaNPayload(f float64) (payload uint64, ok bool) {
b := math.Float64bits(f)
if b&nanBits != nanBits {
return 0, false
}
return b &^ nanMask, true
}
// NearestIdx returns the index of the element in s
// whose value is nearest to v. If several such
// elements exist, the lowest index is returned.
// NearestIdx panics if len(s) == 0.
func NearestIdx(s []float64, v float64) int {
if len(s) == 0 {
panic("floats: zero length slice")
}
switch {
case math.IsNaN(v):
return 0
case math.IsInf(v, 1):
return MaxIdx(s)
case math.IsInf(v, -1):
return MinIdx(s)
}
var ind int
dist := math.NaN()
for i, val := range s {
newDist := math.Abs(v - val)
// A NaN distance will not be closer.
if math.IsNaN(newDist) {
continue
}
if newDist < dist || math.IsNaN(dist) {
dist = newDist
ind = i
}
}
return ind
}
// NearestIdxForSpan return the index of a hypothetical vector created
// by Span with length n and bounds l and u whose value is closest
// to v. That is, NearestIdxForSpan(n, l, u, v) is equivalent to
// Nearest(Span(make([]float64, n),l,u),v) without an allocation.
// NearestIdxForSpan panics if n is less than two.
func NearestIdxForSpan(n int, l, u float64, v float64) int {
if n <= 1 {
panic("floats: span must have length >1")
}
if math.IsNaN(v) {
return 0
}
// Special cases for Inf and NaN.
switch {
case math.IsNaN(l) && !math.IsNaN(u):
return n - 1
case math.IsNaN(u):
return 0
case math.IsInf(l, 0) && math.IsInf(u, 0):
if l == u {
return 0
}
if n%2 == 1 {
if !math.IsInf(v, 0) {
return n / 2
}
if math.Copysign(1, v) == math.Copysign(1, l) {
return 0
}
return n/2 + 1
}
if math.Copysign(1, v) == math.Copysign(1, l) {
return 0
}
return n / 2
case math.IsInf(l, 0):
if v == l {
return 0
}
return n - 1
case math.IsInf(u, 0):
if v == u {
return n - 1
}
return 0
case math.IsInf(v, -1):
if l <= u {
return 0
}
return n - 1
case math.IsInf(v, 1):
if u <= l {
return 0
}
return n - 1
}
// Special cases for v outside (l, u) and (u, l).
switch {
case l < u:
if v <= l {
return 0
}
if v >= u {
return n - 1
}
case l > u:
if v >= l {
return 0
}
if v <= u {
return n - 1
}
default:
return 0
}
// Can't guarantee anything about exactly halfway between
// because of floating point weirdness.
return int((float64(n)-1)/(u-l)*(v-l) + 0.5)
}
// Norm returns the L norm of the slice S, defined as
// (sum_{i=1}^N s[i]^L)^{1/L}
// Special cases:
// L = math.Inf(1) gives the maximum absolute value.
// Does not correctly compute the zero norm (use Count).
func Norm(s []float64, L float64) float64 {
// Should this complain if L is not positive?
// Should this be done in log space for better numerical stability?
// would be more cost
// maybe only if L is high?
if len(s) == 0 {
return 0
}
if L == 2 {
twoNorm := math.Abs(s[0])
for i := 1; i < len(s); i++ {
twoNorm = math.Hypot(twoNorm, s[i])
}
return twoNorm
}
var norm float64
if L == 1 {
for _, val := range s {
norm += math.Abs(val)
}
return norm
}
if math.IsInf(L, 1) {
for _, val := range s {
norm = math.Max(norm, math.Abs(val))
}
return norm
}
for _, val := range s {
norm += math.Pow(math.Abs(val), L)
}
return math.Pow(norm, 1/L)
}
// ParseWithNA converts the string s to a float64 in v.
// If s equals missing, w is returned as 0, otherwise 1.
func ParseWithNA(s, missing string) (v, w float64, err error) {
if s == missing {
return 0, 0, nil
}
v, err = strconv.ParseFloat(s, 64)
if err == nil {
w = 1
}
return v, w, err
}
// Prod returns the product of the elements of the slice.
// Returns 1 if len(s) = 0.
func Prod(s []float64) float64 {
prod := 1.0
for _, val := range s {
prod *= val
}
return prod
}
// Reverse reverses the order of elements in the slice.
func Reverse(s []float64) {
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
s[i], s[j] = s[j], s[i]
}
}
// Round returns the half away from zero rounded value of x with prec precision.
//
// Special cases are:
// Round(±0) = +0
// Round(±Inf) = ±Inf
// Round(NaN) = NaN
func Round(x float64, prec int) float64 {
if x == 0 {
// Make sure zero is returned
// without the negative bit set.
return 0
}
// Fast path for positive precision on integers.
if prec >= 0 && x == math.Trunc(x) {
return x
}
pow := math.Pow10(prec)
intermed := x * pow
if math.IsInf(intermed, 0) {
return x
}
if x < 0 {
x = math.Ceil(intermed - 0.5)
} else {
x = math.Floor(intermed + 0.5)
}
if x == 0 {
return 0
}
return x / pow
}
// RoundEven returns the half even rounded value of x with prec precision.
//
// Special cases are:
// RoundEven(±0) = +0
// RoundEven(±Inf) = ±Inf
// RoundEven(NaN) = NaN
func RoundEven(x float64, prec int) float64 {
if x == 0 {
// Make sure zero is returned
// without the negative bit set.
return 0
}
// Fast path for positive precision on integers.
if prec >= 0 && x == math.Trunc(x) {
return x
}
pow := math.Pow10(prec)
intermed := x * pow
if math.IsInf(intermed, 0) {
return x
}
if isHalfway(intermed) {
correction, _ := math.Modf(math.Mod(intermed, 2))
intermed += correction
if intermed > 0 {
x = math.Floor(intermed)
} else {
x = math.Ceil(intermed)
}
} else {
if x < 0 {
x = math.Ceil(intermed - 0.5)
} else {
x = math.Floor(intermed + 0.5)
}
}
if x == 0 {
return 0
}
return x / pow
}
func isHalfway(x float64) bool {
_, frac := math.Modf(x)
frac = math.Abs(frac)
return frac == 0.5 || (math.Nextafter(frac, math.Inf(-1)) < 0.5 && math.Nextafter(frac, math.Inf(1)) > 0.5)
}
// Same returns true if the input slices have the same length and the all elements
// have the same value with NaN treated as the same.
func Same(s, t []float64) bool {
if len(s) != len(t) {
return false
}
for i, v := range s {
w := t[i]
if v != w && !(math.IsNaN(v) && math.IsNaN(w)) {
return false
}
}
return true
}
// Scale multiplies every element in dst by the scalar c.
func Scale(c float64, dst []float64) {
if len(dst) > 0 {
f64.ScalUnitary(c, dst)
}
}
// Span returns a set of N equally spaced points between l and u, where N
// is equal to the length of the destination. The first element of the destination
// is l, the final element of the destination is u.
//
// Panics if len(dst) < 2.
//
// Span also returns the mutated slice dst, so that it can be used in range expressions,
// like:
//
// for i, x := range Span(dst, l, u) { ... }
func Span(dst []float64, l, u float64) []float64 {
n := len(dst)
if n < 2 {
panic("floats: destination must have length >1")
}
// Special cases for Inf and NaN.
switch {
case math.IsNaN(l):
for i := range dst[:len(dst)-1] {
dst[i] = math.NaN()
}
dst[len(dst)-1] = u
return dst
case math.IsNaN(u):
for i := range dst[1:] {
dst[i+1] = math.NaN()
}
dst[0] = l
return dst
case math.IsInf(l, 0) && math.IsInf(u, 0):
for i := range dst[:len(dst)/2] {
dst[i] = l
dst[len(dst)-i-1] = u
}
if len(dst)%2 == 1 {
if l != u {
dst[len(dst)/2] = 0
} else {
dst[len(dst)/2] = l
}
}
return dst
case math.IsInf(l, 0):
for i := range dst[:len(dst)-1] {
dst[i] = l
}
dst[len(dst)-1] = u
return dst
case math.IsInf(u, 0):
for i := range dst[1:] {
dst[i+1] = u
}
dst[0] = l
return dst
}
step := (u - l) / float64(n-1)
for i := range dst {
dst[i] = l + step*float64(i)
}
return dst
}
// Sub subtracts, element-wise, the elements of s from dst. Panics if
// the lengths of dst and s do not match.
func Sub(dst, s []float64) {
if len(dst) != len(s) {
panic("floats: length of the slices do not match")
}
f64.AxpyUnitaryTo(dst, -1, s, dst)
}
// SubTo subtracts, element-wise, the elements of t from s and
// stores the result in dst. Panics if the lengths of s, t and dst do not match.
func SubTo(dst, s, t []float64) []float64 {
if len(s) != len(t) {
panic("floats: length of subtractor and subtractee do not match")
}
if len(dst) != len(s) {
panic("floats: length of destination does not match length of subtractor")
}
f64.AxpyUnitaryTo(dst, -1, t, s)
return dst
}
// Sum returns the sum of the elements of the slice.
func Sum(s []float64) float64 {
var sum float64
for _, val := range s {
sum += val
}
return sum
}
// Within returns the first index i where s[i] <= v < s[i+1]. Within panics if:
// - len(s) < 2
// - s is not sorted
func Within(s []float64, v float64) int {
if len(s) < 2 {
panic("floats: slice length less than 2")
}
if !sort.Float64sAreSorted(s) {
panic("floats: input slice not sorted")
}
if v < s[0] || v >= s[len(s)-1] || math.IsNaN(v) {
return -1
}
for i, f := range s[1:] {
if v < f {
return i
}
}
return -1
}
@@ -0,0 +1 @@
test.out
@@ -0,0 +1,3 @@
# Gonum graph [![GoDoc](https://godoc.org/gonum.org/v1/gonum/graph?status.svg)](https://godoc.org/gonum.org/v1/gonum/graph)
This is a generalized graph package for the Go language.
+6
View File
@@ -0,0 +1,6 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package graph defines graph interfaces.
package graph
+253
View File
@@ -0,0 +1,253 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package graph
// Node is a graph node. It returns a graph-unique integer ID.
type Node interface {
ID() int64
}
// Edge is a graph edge. In directed graphs, the direction of the
// edge is given from -> to, otherwise the edge is semantically
// unordered.
type Edge interface {
From() Node
To() Node
}
// WeightedEdge is a weighted graph edge. In directed graphs, the direction
// of the edge is given from -> to, otherwise the edge is semantically
// unordered.
type WeightedEdge interface {
Edge
Weight() float64
}
// Graph is a generalized graph.
type Graph interface {
// Has returns whether a node with the given ID exists
// within the graph.
Has(id int64) bool
// Nodes returns all the nodes in the graph.
Nodes() []Node
// From returns all nodes that can be reached directly
// from the node with the given ID.
From(id int64) []Node
// HasEdgeBetween returns whether an edge exists between
// nodes with IDs xid and yid without considering direction.
HasEdgeBetween(xid, yid int64) bool
// Edge returns the edge from u to v, with IDs uid and vid,
// if such an edge exists and nil otherwise. The node v
// must be directly reachable from u as defined by the
// From method.
Edge(uid, vid int64) Edge
}
// Weighted is a weighted graph.
type Weighted interface {
Graph
// WeightedEdge returns the weighted edge from u to v
// with IDs uid and vid if such an edge exists and
// nil otherwise. The node v must be directly
// reachable from u as defined by the From method.
WeightedEdge(uid, vid int64) WeightedEdge
// Weight returns the weight for the edge between
// x and y with IDs xid and yid if Edge(xid, yid)
// returns a non-nil Edge.
// If x and y are the same node or there is no
// joining edge between the two nodes the weight
// value returned is implementation dependent.
// Weight returns true if an edge exists between
// x and y or if x and y have the same ID, false
// otherwise.
Weight(xid, yid int64) (w float64, ok bool)
}
// Undirected is an undirected graph.
type Undirected interface {
Graph
// EdgeBetween returns the edge between nodes x and y
// with IDs xid and yid.
EdgeBetween(xid, yid int64) Edge
}
// WeightedUndirected is a weighted undirected graph.
type WeightedUndirected interface {
Weighted
// WeightedEdgeBetween returns the edge between nodes
// x and y with IDs xid and yid.
WeightedEdgeBetween(xid, yid int64) WeightedEdge
}
// Directed is a directed graph.
type Directed interface {
Graph
// HasEdgeFromTo returns whether an edge exists
// in the graph from u to v with IDs uid and vid.
HasEdgeFromTo(uid, vid int64) bool
// To returns all nodes that can reach directly
// to the node with the given ID.
To(id int64) []Node
}
// WeightedDirected is a weighted directed graph.
type WeightedDirected interface {
Weighted
// HasEdgeFromTo returns whether an edge exists
// in the graph from u to v with the IDs uid and
// vid.
HasEdgeFromTo(uid, vid int64) bool
// To returns all nodes that can reach directly
// to the node with the given ID.
To(id int64) []Node
}
// NodeAdder is an interface for adding arbitrary nodes to a graph.
type NodeAdder interface {
// NewNode returns a new Node with a unique
// arbitrary ID.
NewNode() Node
// Adds a node to the graph. AddNode panics if
// the added node ID matches an existing node ID.
AddNode(Node)
}
// NodeRemover is an interface for removing nodes from a graph.
type NodeRemover interface {
// RemoveNode removes the node with the given ID
// from the graph, as well as any edges attached
// to it. If the node is not in the graph it is
// a no-op.
RemoveNode(id int64)
}
// EdgeAdder is an interface for adding edges to a graph.
type EdgeAdder interface {
// NewEdge returns a new Edge from the source to the destination node.
NewEdge(from, to Node) Edge
// SetEdge adds an edge from one node to another.
// If the graph supports node addition the nodes
// will be added if they do not exist, otherwise
// SetEdge will panic.
// The behavior of an EdgeAdder when the IDs
// returned by e.From and e.To are equal is
// implementation-dependent.
SetEdge(e Edge)
}
// WeightedEdgeAdder is an interface for adding edges to a graph.
type WeightedEdgeAdder interface {
// NewWeightedEdge returns a new WeightedEdge from
// the source to the destination node.
NewWeightedEdge(from, to Node, weight float64) WeightedEdge
// SetWeightedEdge adds an edge from one node to
// another. If the graph supports node addition
// the nodes will be added if they do not exist,
// otherwise SetWeightedEdge will panic.
// The behavior of a WeightedEdgeAdder when the IDs
// returned by e.From and e.To are equal is
// implementation-dependent.
SetWeightedEdge(e WeightedEdge)
}
// EdgeRemover is an interface for removing nodes from a graph.
type EdgeRemover interface {
// RemoveEdge removes the edge with the given end
// IDs, leaving the terminal nodes. If the edge
// does not exist it is a no-op.
RemoveEdge(fid, tid int64)
}
// Builder is a graph that can have nodes and edges added.
type Builder interface {
NodeAdder
EdgeAdder
}
// WeightedBuilder is a graph that can have nodes and weighted edges added.
type WeightedBuilder interface {
NodeAdder
WeightedEdgeAdder
}
// UndirectedBuilder is an undirected graph builder.
type UndirectedBuilder interface {
Undirected
Builder
}
// UndirectedWeightedBuilder is an undirected weighted graph builder.
type UndirectedWeightedBuilder interface {
Undirected
WeightedBuilder
}
// DirectedBuilder is a directed graph builder.
type DirectedBuilder interface {
Directed
Builder
}
// DirectedWeightedBuilder is a directed weighted graph builder.
type DirectedWeightedBuilder interface {
Directed
WeightedBuilder
}
// Copy copies nodes and edges as undirected edges from the source to the destination
// without first clearing the destination. Copy will panic if a node ID in the source
// graph matches a node ID in the destination.
//
// If the source is undirected and the destination is directed both directions will
// be present in the destination after the copy is complete.
func Copy(dst Builder, src Graph) {
nodes := src.Nodes()
for _, n := range nodes {
dst.AddNode(n)
}
for _, u := range nodes {
for _, v := range src.From(u.ID()) {
dst.SetEdge(dst.NewEdge(u, v))
}
}
}
// CopyWeighted copies nodes and edges as undirected edges from the source to the destination
// without first clearing the destination. Copy will panic if a node ID in the source
// graph matches a node ID in the destination.
//
// If the source is undirected and the destination is directed both directions will
// be present in the destination after the copy is complete.
//
// If the source is a directed graph, the destination is undirected, and a fundamental
// cycle exists with two nodes where the edge weights differ, the resulting destination
// graph's edge weight between those nodes is undefined. If there is a defined function
// to resolve such conflicts, an UndirectWeighted may be used to do this.
func CopyWeighted(dst WeightedBuilder, src Weighted) {
nodes := src.Nodes()
for _, n := range nodes {
dst.AddNode(n)
}
for _, u := range nodes {
for _, v := range src.From(u.ID()) {
dst.SetWeightedEdge(dst.NewWeightedEdge(u, v, src.WeightedEdge(u.ID(), v.ID()).Weight()))
}
}
}
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package linear provides common linear data structures.
package linear
@@ -0,0 +1,73 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package linear
import (
"gonum.org/v1/gonum/graph"
)
// NodeStack implements a LIFO stack of graph.Node.
type NodeStack []graph.Node
// Len returns the number of graph.Nodes on the stack.
func (s *NodeStack) Len() int { return len(*s) }
// Pop returns the last graph.Node on the stack and removes it
// from the stack.
func (s *NodeStack) Pop() graph.Node {
v := *s
v, n := v[:len(v)-1], v[len(v)-1]
*s = v
return n
}
// Push adds the node n to the stack at the last position.
func (s *NodeStack) Push(n graph.Node) { *s = append(*s, n) }
// NodeQueue implements a FIFO queue.
type NodeQueue struct {
head int
data []graph.Node
}
// Len returns the number of graph.Nodes in the queue.
func (q *NodeQueue) Len() int { return len(q.data) - q.head }
// Enqueue adds the node n to the back of the queue.
func (q *NodeQueue) Enqueue(n graph.Node) {
if len(q.data) == cap(q.data) && q.head > 0 {
l := q.Len()
copy(q.data, q.data[q.head:])
q.head = 0
q.data = append(q.data[:l], n)
} else {
q.data = append(q.data, n)
}
}
// Dequeue returns the graph.Node at the front of the queue and
// removes it from the queue.
func (q *NodeQueue) Dequeue() graph.Node {
if q.Len() == 0 {
panic("queue: empty queue")
}
var n graph.Node
n, q.data[q.head] = q.data[q.head], nil
q.head++
if q.Len() == 0 {
q.head = 0
q.data = q.data[:0]
}
return n
}
// Reset clears the queue for reuse.
func (q *NodeQueue) Reset() {
q.head = 0
q.data = q.data[:0]
}
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ordered provides common sort ordering types.
package ordered
@@ -0,0 +1,76 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ordered
import "gonum.org/v1/gonum/graph"
// ByID implements the sort.Interface sorting a slice of graph.Node
// by ID.
type ByID []graph.Node
func (n ByID) Len() int { return len(n) }
func (n ByID) Less(i, j int) bool { return n[i].ID() < n[j].ID() }
func (n ByID) Swap(i, j int) { n[i], n[j] = n[j], n[i] }
// BySliceValues implements the sort.Interface sorting a slice of
// []int64 lexically by the values of the []int64.
type BySliceValues [][]int64
func (c BySliceValues) Len() int { return len(c) }
func (c BySliceValues) Less(i, j int) bool {
a, b := c[i], c[j]
l := len(a)
if len(b) < l {
l = len(b)
}
for k, v := range a[:l] {
if v < b[k] {
return true
}
if v > b[k] {
return false
}
}
return len(a) < len(b)
}
func (c BySliceValues) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
// BySliceIDs implements the sort.Interface sorting a slice of
// []graph.Node lexically by the IDs of the []graph.Node.
type BySliceIDs [][]graph.Node
func (c BySliceIDs) Len() int { return len(c) }
func (c BySliceIDs) Less(i, j int) bool {
a, b := c[i], c[j]
l := len(a)
if len(b) < l {
l = len(b)
}
for k, v := range a[:l] {
if v.ID() < b[k].ID() {
return true
}
if v.ID() > b[k].ID() {
return false
}
}
return len(a) < len(b)
}
func (c BySliceIDs) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
// Int64s implements the sort.Interface sorting a slice of
// int64.
type Int64s []int64
func (s Int64s) Len() int { return len(s) }
func (s Int64s) Less(i, j int) bool { return s[i] < s[j] }
func (s Int64s) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// Reverse reverses the order of nodes.
func Reverse(nodes []graph.Node) {
for i, j := 0, len(nodes)-1; i < j; i, j = i+1, j-1 {
nodes[i], nodes[j] = nodes[j], nodes[i]
}
}
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package set provides integer and graph.Node sets.
package set
@@ -0,0 +1,36 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine,!safe
package set
import "unsafe"
// same determines whether two sets are backed by the same store. In the
// current implementation using hash maps it makes use of the fact that
// hash maps are passed as a pointer to a runtime Hmap struct. A map is
// not seen by the runtime as a pointer though, so we use unsafe to get
// the maps' pointer values to compare.
func same(a, b Nodes) bool {
return *(*uintptr)(unsafe.Pointer(&a)) == *(*uintptr)(unsafe.Pointer(&b))
}
// intsSame determines whether two sets are backed by the same store. In the
// current implementation using hash maps it makes use of the fact that
// hash maps are passed as a pointer to a runtime Hmap struct. A map is
// not seen by the runtime as a pointer though, so we use unsafe to get
// the maps' pointer values to compare.
func intsSame(a, b Ints) bool {
return *(*uintptr)(unsafe.Pointer(&a)) == *(*uintptr)(unsafe.Pointer(&b))
}
// int64sSame determines whether two sets are backed by the same store. In the
// current implementation using hash maps it makes use of the fact that
// hash maps are passed as a pointer to a runtime Hmap struct. A map is
// not seen by the runtime as a pointer though, so we use unsafe to get
// the maps' pointer values to compare.
func int64sSame(a, b Int64s) bool {
return *(*uintptr)(unsafe.Pointer(&a)) == *(*uintptr)(unsafe.Pointer(&b))
}
@@ -0,0 +1,36 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build appengine safe
package set
import "reflect"
// same determines whether two sets are backed by the same store. In the
// current implementation using hash maps it makes use of the fact that
// hash maps are passed as a pointer to a runtime Hmap struct. A map is
// not seen by the runtime as a pointer though, so we use reflect to get
// the maps' pointer values to compare.
func same(a, b Nodes) bool {
return reflect.ValueOf(a).Pointer() == reflect.ValueOf(b).Pointer()
}
// intsSame determines whether two sets are backed by the same store. In the
// current implementation using hash maps it makes use of the fact that
// hash maps are passed as a pointer to a runtime Hmap struct. A map is
// not seen by the runtime as a pointer though, so we use reflect to get
// the maps' pointer values to compare.
func intsSame(a, b Ints) bool {
return reflect.ValueOf(a).Pointer() == reflect.ValueOf(b).Pointer()
}
// int64sSame determines whether two sets are backed by the same store. In the
// current implementation using hash maps it makes use of the fact that
// hash maps are passed as a pointer to a runtime Hmap struct. A map is
// not seen by the runtime as a pointer though, so we use reflect to get
// the maps' pointer values to compare.
func int64sSame(a, b Int64s) bool {
return reflect.ValueOf(a).Pointer() == reflect.ValueOf(b).Pointer()
}
@@ -0,0 +1,256 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package set
import "gonum.org/v1/gonum/graph"
// Ints is a set of int identifiers.
type Ints map[int]struct{}
// The simple accessor methods for Ints are provided to allow ease of
// implementation change should the need arise.
// Add inserts an element into the set.
func (s Ints) Add(e int) {
s[e] = struct{}{}
}
// Has reports the existence of the element in the set.
func (s Ints) Has(e int) bool {
_, ok := s[e]
return ok
}
// Remove deletes the specified element from the set.
func (s Ints) Remove(e int) {
delete(s, e)
}
// Count reports the number of elements stored in the set.
func (s Ints) Count() int {
return len(s)
}
// IntsEqual reports set equality between the parameters. Sets are equal if
// and only if they have the same elements.
func IntsEqual(a, b Ints) bool {
if intsSame(a, b) {
return true
}
if len(a) != len(b) {
return false
}
for e := range a {
if _, ok := b[e]; !ok {
return false
}
}
return true
}
// Int64s is a set of int64 identifiers.
type Int64s map[int64]struct{}
// The simple accessor methods for Ints are provided to allow ease of
// implementation change should the need arise.
// Add inserts an element into the set.
func (s Int64s) Add(e int64) {
s[e] = struct{}{}
}
// Has reports the existence of the element in the set.
func (s Int64s) Has(e int64) bool {
_, ok := s[e]
return ok
}
// Remove deletes the specified element from the set.
func (s Int64s) Remove(e int64) {
delete(s, e)
}
// Count reports the number of elements stored in the set.
func (s Int64s) Count() int {
return len(s)
}
// Int64sEqual reports set equality between the parameters. Sets are equal if
// and only if they have the same elements.
func Int64sEqual(a, b Int64s) bool {
if int64sSame(a, b) {
return true
}
if len(a) != len(b) {
return false
}
for e := range a {
if _, ok := b[e]; !ok {
return false
}
}
return true
}
// Nodes is a set of nodes keyed in their integer identifiers.
type Nodes map[int64]graph.Node
// The simple accessor methods for Nodes are provided to allow ease of
// implementation change should the need arise.
// Add inserts an element into the set.
func (s Nodes) Add(n graph.Node) {
s[n.ID()] = n
}
// Remove deletes the specified element from the set.
func (s Nodes) Remove(e graph.Node) {
delete(s, e.ID())
}
// Has reports the existence of the element in the set.
func (s Nodes) Has(n graph.Node) bool {
_, ok := s[n.ID()]
return ok
}
// clear clears the set, possibly using the same backing store.
func (s *Nodes) clear() {
if len(*s) != 0 {
*s = make(Nodes)
}
}
// Copy performs a perfect copy from src to dst (meaning the sets will
// be equal).
func (dst Nodes) Copy(src Nodes) Nodes {
if same(src, dst) {
return dst
}
if len(dst) > 0 {
dst = make(Nodes, len(src))
}
for e, n := range src {
dst[e] = n
}
return dst
}
// Equal reports set equality between the parameters. Sets are equal if
// and only if they have the same elements.
func Equal(a, b Nodes) bool {
if same(a, b) {
return true
}
if len(a) != len(b) {
return false
}
for e := range a {
if _, ok := b[e]; !ok {
return false
}
}
return true
}
// Union takes the union of a and b, and stores it in dst.
//
// The union of two sets, a and b, is the set containing all the
// elements of each, for instance:
//
// {a,b,c} UNION {d,e,f} = {a,b,c,d,e,f}
//
// Since sets may not have repetition, unions of two sets that overlap
// do not contain repeat elements, that is:
//
// {a,b,c} UNION {b,c,d} = {a,b,c,d}
//
func (dst Nodes) Union(a, b Nodes) Nodes {
if same(a, b) {
return dst.Copy(a)
}
if !same(a, dst) && !same(b, dst) {
dst.clear()
}
if !same(dst, a) {
for e, n := range a {
dst[e] = n
}
}
if !same(dst, b) {
for e, n := range b {
dst[e] = n
}
}
return dst
}
// Intersect takes the intersection of a and b, and stores it in dst.
//
// The intersection of two sets, a and b, is the set containing all
// the elements shared between the two sets, for instance:
//
// {a,b,c} INTERSECT {b,c,d} = {b,c}
//
// The intersection between a set and itself is itself, and thus
// effectively a copy operation:
//
// {a,b,c} INTERSECT {a,b,c} = {a,b,c}
//
// The intersection between two sets that share no elements is the empty
// set:
//
// {a,b,c} INTERSECT {d,e,f} = {}
//
func (dst Nodes) Intersect(a, b Nodes) Nodes {
var swap Nodes
if same(a, b) {
return dst.Copy(a)
}
if same(a, dst) {
swap = b
} else if same(b, dst) {
swap = a
} else {
dst.clear()
if len(a) > len(b) {
a, b = b, a
}
for e, n := range a {
if _, ok := b[e]; ok {
dst[e] = n
}
}
return dst
}
for e := range dst {
if _, ok := swap[e]; !ok {
delete(dst, e)
}
}
return dst
}
@@ -0,0 +1,54 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package uid implements unique ID provision for graphs.
package uid
import "gonum.org/v1/gonum/graph/internal/set"
// Max is the maximum value of int64.
const Max = int64(^uint64(0) >> 1)
// Set implements available ID storage.
type Set struct {
maxID int64
used, free set.Int64s
}
// NewSet returns a new Set. The returned value should not be passed except by pointer.
func NewSet() Set {
return Set{maxID: -1, used: make(set.Int64s), free: make(set.Int64s)}
}
// NewID returns a new unique ID. The ID returned is not considered used
// until passed in a call to use.
func (s *Set) NewID() int64 {
for id := range s.free {
return id
}
if s.maxID != Max {
return s.maxID + 1
}
for id := int64(0); id <= s.maxID+1; id++ {
if !s.used.Has(id) {
return id
}
}
panic("unreachable")
}
// Use adds the id to the used IDs in the Set.
func (s *Set) Use(id int64) {
s.used.Add(id)
s.free.Remove(id)
if id > s.maxID {
s.maxID = id
}
}
// Release frees the id for reuse.
func (s *Set) Release(id int64) {
s.free.Add(id)
s.used.Remove(id)
}
@@ -0,0 +1,168 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package graph
// Line is an edge in a multigraph. A Line returns an ID that must
// distinguish Lines sharing Node end points.
type Line interface {
Edge
ID() int64
}
// WeightedLine is a weighted multigraph edge.
type WeightedLine interface {
Line
Weight() float64
}
// Multigraph is a generalized multigraph.
type Multigraph interface {
// Has returns whether the node with the given ID exists
// within the multigraph.
Has(id int64) bool
// Nodes returns all the nodes in the multigraph.
Nodes() []Node
// From returns all nodes that can be reached directly
// from the node with the given ID.
From(id int64) []Node
// HasEdgeBetween returns whether an edge exists between
// nodes with IDs xid and yid without considering direction.
HasEdgeBetween(xid, yid int64) bool
// Lines returns the lines from u to v, with IDs uid and
// vid, if any such lines exist and nil otherwise. The
// node v must be directly reachable from u as defined by
// the From method.
Lines(uid, vid int64) []Line
}
// WeightedMultigraph is a weighted multigraph.
type WeightedMultigraph interface {
Multigraph
// WeightedLines returns the weighted lines from u to v
// with IDs uid and vid if any such lines exist and nil
// otherwise. The node v must be directly reachable
// from u as defined by the From method.
WeightedLines(uid, vid int64) []WeightedLine
}
// UndirectedMultigraph is an undirected multigraph.
type UndirectedMultigraph interface {
Multigraph
// LinesBetween returns the lines between nodes x and y
// with IDs xid and yid.
LinesBetween(xid, yid int64) []Line
}
// WeightedUndirectedMultigraph is a weighted undirected multigraph.
type WeightedUndirectedMultigraph interface {
WeightedMultigraph
// WeightedLinesBetween returns the lines between nodes
// x and y with IDs xid and yid.
WeightedLinesBetween(xid, yid int64) []WeightedLine
}
// DirectedMultigraph is a directed multigraph.
type DirectedMultigraph interface {
Multigraph
// HasEdgeFromTo returns whether an edge exists
// in the multigraph from u to v with IDs uid
// and vid.
HasEdgeFromTo(uid, vid int64) bool
// To returns all nodes that can reach directly
// to the node with the given ID.
To(id int64) []Node
}
// WeightedDirectedMultigraph is a weighted directed multigraph.
type WeightedDirectedMultigraph interface {
WeightedMultigraph
// HasEdgeFromTo returns whether an edge exists
// in the multigraph from u to v with IDs uid
// and vid.
HasEdgeFromTo(uid, vid int64) bool
// To returns all nodes that can reach directly
// to the node with the given ID.
To(id int64) []Node
}
// LineAdder is an interface for adding lines to a multigraph.
type LineAdder interface {
// NewLine returns a new Line from the source to the destination node.
NewLine(from, to Node) Line
// SetLine adds a Line from one node to another.
// If the multigraph supports node addition the nodes
// will be added if they do not exist, otherwise
// SetLine will panic.
SetLine(l Line)
}
// WeightedLineAdder is an interface for adding lines to a multigraph.
type WeightedLineAdder interface {
// NewWeightedLine returns a new WeightedLine from
// the source to the destination node.
NewWeightedLine(from, to Node, weight float64) WeightedLine
// SetWeightedLine adds a weighted line from one node
// to another. If the multigraph supports node addition
// the nodes will be added if they do not exist,
// otherwise SetWeightedLine will panic.
SetWeightedLine(e WeightedLine)
}
// LineRemover is an interface for removing lines from a multigraph.
type LineRemover interface {
// RemoveLine removes the line with the given end
// and line IDs, leaving the terminal nodes. If
// the line does not exist it is a no-op.
RemoveLine(fid, tid, id int64)
}
// MultigraphBuilder is a multigraph that can have nodes and lines added.
type MultigraphBuilder interface {
NodeAdder
LineAdder
}
// WeightedMultigraphBuilder is a multigraph that can have nodes and weighted lines added.
type WeightedMultigraphBuilder interface {
NodeAdder
WeightedLineAdder
}
// UndirectedMultgraphBuilder is an undirected multigraph builder.
type UndirectedMultigraphBuilder interface {
UndirectedMultigraph
MultigraphBuilder
}
// UndirectedWeightedMultigraphBuilder is an undirected weighted multigraph builder.
type UndirectedWeightedMultigraphBuilder interface {
UndirectedMultigraph
WeightedMultigraphBuilder
}
// DirectedMultigraphBuilder is a directed multigraph builder.
type DirectedMultigraphBuilder interface {
DirectedMultigraph
MultigraphBuilder
}
// DirectedWeightedMultigraphBuilder is a directed weighted multigraph builder.
type DirectedWeightedMultigraphBuilder interface {
DirectedMultigraph
WeightedMultigraphBuilder
}
@@ -0,0 +1,289 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"sort"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/ordered"
"gonum.org/v1/gonum/mat"
)
// DirectedMatrix represents a directed graph using an adjacency
// matrix such that all IDs are in a contiguous block from 0 to n-1.
// Edges are stored implicitly as an edge weight, so edges stored in
// the graph are not recoverable.
type DirectedMatrix struct {
mat *mat.Dense
nodes []graph.Node
self float64
absent float64
}
// NewDirectedMatrix creates a directed dense graph with n nodes.
// All edges are initialized with the weight given by init. The self parameter
// specifies the cost of self connection, and absent specifies the weight
// returned for absent edges.
func NewDirectedMatrix(n int, init, self, absent float64) *DirectedMatrix {
matrix := make([]float64, n*n)
if init != 0 {
for i := range matrix {
matrix[i] = init
}
}
for i := 0; i < len(matrix); i += n + 1 {
matrix[i] = self
}
return &DirectedMatrix{
mat: mat.NewDense(n, n, matrix),
self: self,
absent: absent,
}
}
// NewDirectedMatrixFrom creates a directed dense graph with the given nodes.
// The IDs of the nodes must be contiguous from 0 to len(nodes)-1, but may
// be in any order. If IDs are not contiguous NewDirectedMatrixFrom will panic.
// All edges are initialized with the weight given by init. The self parameter
// specifies the cost of self connection, and absent specifies the weight
// returned for absent edges.
func NewDirectedMatrixFrom(nodes []graph.Node, init, self, absent float64) *DirectedMatrix {
sort.Sort(ordered.ByID(nodes))
for i, n := range nodes {
if int64(i) != n.ID() {
panic("simple: non-contiguous node IDs")
}
}
g := NewDirectedMatrix(len(nodes), init, self, absent)
g.nodes = nodes
return g
}
// Node returns the node in the graph with the given ID.
func (g *DirectedMatrix) Node(id int64) graph.Node {
if !g.has(id) {
return nil
}
if g.nodes == nil {
return Node(id)
}
return g.nodes[id]
}
// Has returns whether the node exists within the graph.
func (g *DirectedMatrix) Has(id int64) bool {
return g.has(id)
}
func (g *DirectedMatrix) has(id int64) bool {
r, _ := g.mat.Dims()
return 0 <= id && id < int64(r)
}
// Nodes returns all the nodes in the graph.
func (g *DirectedMatrix) Nodes() []graph.Node {
if g.nodes != nil {
nodes := make([]graph.Node, len(g.nodes))
copy(nodes, g.nodes)
return nodes
}
r, _ := g.mat.Dims()
nodes := make([]graph.Node, r)
for i := 0; i < r; i++ {
nodes[i] = Node(i)
}
return nodes
}
// Edges returns all the edges in the graph.
func (g *DirectedMatrix) Edges() []graph.Edge {
var edges []graph.Edge
r, _ := g.mat.Dims()
for i := 0; i < r; i++ {
for j := 0; j < r; j++ {
if i == j {
continue
}
if w := g.mat.At(i, j); !isSame(w, g.absent) {
edges = append(edges, WeightedEdge{F: g.Node(int64(i)), T: g.Node(int64(j)), W: w})
}
}
}
return edges
}
// From returns all nodes in g that can be reached directly from n.
func (g *DirectedMatrix) From(id int64) []graph.Node {
if !g.has(id) {
return nil
}
var neighbors []graph.Node
_, c := g.mat.Dims()
for j := 0; j < c; j++ {
if int64(j) == id {
continue
}
// id is not greater than maximum int by this point.
if !isSame(g.mat.At(int(id), j), g.absent) {
neighbors = append(neighbors, g.Node(int64(j)))
}
}
return neighbors
}
// To returns all nodes in g that can reach directly to n.
func (g *DirectedMatrix) To(id int64) []graph.Node {
if !g.has(id) {
return nil
}
var neighbors []graph.Node
r, _ := g.mat.Dims()
for i := 0; i < r; i++ {
if int64(i) == id {
continue
}
// id is not greater than maximum int by this point.
if !isSame(g.mat.At(i, int(id)), g.absent) {
neighbors = append(neighbors, g.Node(int64(i)))
}
}
return neighbors
}
// HasEdgeBetween returns whether an edge exists between nodes x and y without
// considering direction.
func (g *DirectedMatrix) HasEdgeBetween(xid, yid int64) bool {
if !g.has(xid) {
return false
}
if !g.has(yid) {
return false
}
// xid and yid are not greater than maximum int by this point.
return xid != yid && (!isSame(g.mat.At(int(xid), int(yid)), g.absent) || !isSame(g.mat.At(int(yid), int(xid)), g.absent))
}
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *DirectedMatrix) Edge(uid, vid int64) graph.Edge {
return g.WeightedEdge(uid, vid)
}
// WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *DirectedMatrix) WeightedEdge(uid, vid int64) graph.WeightedEdge {
if g.HasEdgeFromTo(uid, vid) {
// xid and yid are not greater than maximum int by this point.
return WeightedEdge{F: g.Node(uid), T: g.Node(vid), W: g.mat.At(int(uid), int(vid))}
}
return nil
}
// HasEdgeFromTo returns whether an edge exists in the graph from u to v.
func (g *DirectedMatrix) HasEdgeFromTo(uid, vid int64) bool {
if !g.has(uid) {
return false
}
if !g.has(vid) {
return false
}
// uid and vid are not greater than maximum int by this point.
return uid != vid && !isSame(g.mat.At(int(uid), int(vid)), g.absent)
}
// Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge.
// If x and y are the same node or there is no joining edge between the two nodes the weight
// value returned is either the graph's absent or self value. Weight returns true if an edge
// exists between x and y or if x and y have the same ID, false otherwise.
func (g *DirectedMatrix) Weight(xid, yid int64) (w float64, ok bool) {
if xid == yid {
return g.self, true
}
if g.has(xid) && g.has(yid) {
// xid and yid are not greater than maximum int by this point.
return g.mat.At(int(xid), int(yid)), true
}
return g.absent, false
}
// SetEdge sets e, an edge from one node to another with unit weight. If the ends of the edge
// are not in g or the edge is a self loop, SetEdge panics.
func (g *DirectedMatrix) SetEdge(e graph.Edge) {
g.setWeightedEdge(e, 1)
}
// SetWeightedEdge sets e, an edge from one node to another. If the ends of the edge are not in g
// or the edge is a self loop, SetWeightedEdge panics.
func (g *DirectedMatrix) SetWeightedEdge(e graph.WeightedEdge) {
g.setWeightedEdge(e, e.Weight())
}
func (g *DirectedMatrix) setWeightedEdge(e graph.Edge, weight float64) {
fid := e.From().ID()
tid := e.To().ID()
if fid == tid {
panic("simple: set illegal edge")
}
if int64(int(fid)) != fid {
panic("simple: unavailable from node ID for dense graph")
}
if int64(int(tid)) != tid {
panic("simple: unavailable to node ID for dense graph")
}
// fid and tid are not greater than maximum int by this point.
g.mat.Set(int(fid), int(tid), weight)
}
// RemoveEdge removes the edge with the given end point nodes from the graph, leaving the terminal
// nodes. If the edge does not exist it is a no-op.
func (g *DirectedMatrix) RemoveEdge(fid, tid int64) {
if !g.has(fid) {
return
}
if !g.has(tid) {
return
}
// fid and tid are not greater than maximum int by this point.
g.mat.Set(int(fid), int(tid), g.absent)
}
// Degree returns the in+out degree of n in g.
func (g *DirectedMatrix) Degree(id int64) int {
if !g.has(id) {
return 0
}
var deg int
r, c := g.mat.Dims()
for i := 0; i < r; i++ {
if int64(i) == id {
continue
}
// id is not greater than maximum int by this point.
if !isSame(g.mat.At(int(id), i), g.absent) {
deg++
}
}
for i := 0; i < c; i++ {
if int64(i) == id {
continue
}
// id is not greater than maximum int by this point.
if !isSame(g.mat.At(i, int(id)), g.absent) {
deg++
}
}
return deg
}
// Matrix returns the mat.Matrix representation of the graph. The orientation
// of the matrix is such that the matrix entry at G_{ij} is the weight of the edge
// from node i to node j.
func (g *DirectedMatrix) Matrix() mat.Matrix {
// Prevent alteration of dimensions of the returned matrix.
m := *g.mat
return &m
}
@@ -0,0 +1,253 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"sort"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/ordered"
"gonum.org/v1/gonum/mat"
)
// UndirectedMatrix represents an undirected graph using an adjacency
// matrix such that all IDs are in a contiguous block from 0 to n-1.
// Edges are stored implicitly as an edge weight, so edges stored in
// the graph are not recoverable.
type UndirectedMatrix struct {
mat *mat.SymDense
nodes []graph.Node
self float64
absent float64
}
// NewUndirectedMatrix creates an undirected dense graph with n nodes.
// All edges are initialized with the weight given by init. The self parameter
// specifies the cost of self connection, and absent specifies the weight
// returned for absent edges.
func NewUndirectedMatrix(n int, init, self, absent float64) *UndirectedMatrix {
matrix := make([]float64, n*n)
if init != 0 {
for i := range matrix {
matrix[i] = init
}
}
for i := 0; i < len(matrix); i += n + 1 {
matrix[i] = self
}
return &UndirectedMatrix{
mat: mat.NewSymDense(n, matrix),
self: self,
absent: absent,
}
}
// NewUndirectedMatrixFrom creates an undirected dense graph with the given nodes.
// The IDs of the nodes must be contiguous from 0 to len(nodes)-1, but may
// be in any order. If IDs are not contiguous NewUndirectedMatrixFrom will panic.
// All edges are initialized with the weight given by init. The self parameter
// specifies the cost of self connection, and absent specifies the weight
// returned for absent edges.
func NewUndirectedMatrixFrom(nodes []graph.Node, init, self, absent float64) *UndirectedMatrix {
sort.Sort(ordered.ByID(nodes))
for i, n := range nodes {
if int64(i) != n.ID() {
panic("simple: non-contiguous node IDs")
}
}
g := NewUndirectedMatrix(len(nodes), init, self, absent)
g.nodes = nodes
return g
}
// Node returns the node in the graph with the given ID.
func (g *UndirectedMatrix) Node(id int64) graph.Node {
if !g.has(id) {
return nil
}
if g.nodes == nil {
return Node(id)
}
return g.nodes[id]
}
// Has returns whether the node exists within the graph.
func (g *UndirectedMatrix) Has(id int64) bool {
return g.has(id)
}
func (g *UndirectedMatrix) has(id int64) bool {
r := g.mat.Symmetric()
return 0 <= id && id < int64(r)
}
// Nodes returns all the nodes in the graph.
func (g *UndirectedMatrix) Nodes() []graph.Node {
if g.nodes != nil {
nodes := make([]graph.Node, len(g.nodes))
copy(nodes, g.nodes)
return nodes
}
r := g.mat.Symmetric()
nodes := make([]graph.Node, r)
for i := 0; i < r; i++ {
nodes[i] = Node(i)
}
return nodes
}
// Edges returns all the edges in the graph.
func (g *UndirectedMatrix) Edges() []graph.Edge {
var edges []graph.Edge
r, _ := g.mat.Dims()
for i := 0; i < r; i++ {
for j := i + 1; j < r; j++ {
if w := g.mat.At(i, j); !isSame(w, g.absent) {
edges = append(edges, WeightedEdge{F: g.Node(int64(i)), T: g.Node(int64(j)), W: w})
}
}
}
return edges
}
// From returns all nodes in g that can be reached directly from n.
func (g *UndirectedMatrix) From(id int64) []graph.Node {
if !g.has(id) {
return nil
}
var neighbors []graph.Node
r := g.mat.Symmetric()
for i := 0; i < r; i++ {
if int64(i) == id {
continue
}
// id is not greater than maximum int by this point.
if !isSame(g.mat.At(int(id), i), g.absent) {
neighbors = append(neighbors, g.Node(int64(i)))
}
}
return neighbors
}
// HasEdgeBetween returns whether an edge exists between nodes x and y.
func (g *UndirectedMatrix) HasEdgeBetween(uid, vid int64) bool {
if !g.has(uid) {
return false
}
if !g.has(vid) {
return false
}
// uid and vid are not greater than maximum int by this point.
return uid != vid && !isSame(g.mat.At(int(uid), int(vid)), g.absent)
}
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *UndirectedMatrix) Edge(uid, vid int64) graph.Edge {
return g.WeightedEdgeBetween(uid, vid)
}
// WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *UndirectedMatrix) WeightedEdge(uid, vid int64) graph.WeightedEdge {
return g.WeightedEdgeBetween(uid, vid)
}
// EdgeBetween returns the edge between nodes x and y.
func (g *UndirectedMatrix) EdgeBetween(uid, vid int64) graph.Edge {
return g.WeightedEdgeBetween(uid, vid)
}
// WeightedEdgeBetween returns the weighted edge between nodes x and y.
func (g *UndirectedMatrix) WeightedEdgeBetween(uid, vid int64) graph.WeightedEdge {
if g.HasEdgeBetween(uid, vid) {
// uid and vid are not greater than maximum int by this point.
return WeightedEdge{F: g.Node(uid), T: g.Node(vid), W: g.mat.At(int(uid), int(vid))}
}
return nil
}
// Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge.
// If x and y are the same node or there is no joining edge between the two nodes the weight
// value returned is either the graph's absent or self value. Weight returns true if an edge
// exists between x and y or if x and y have the same ID, false otherwise.
func (g *UndirectedMatrix) Weight(xid, yid int64) (w float64, ok bool) {
if xid == yid {
return g.self, true
}
if g.has(xid) && g.has(yid) {
// xid and yid are not greater than maximum int by this point.
return g.mat.At(int(xid), int(yid)), true
}
return g.absent, false
}
// SetEdge sets e, an edge from one node to another with unit weight. If the ends of the edge are
// not in g or the edge is a self loop, SetEdge panics.
func (g *UndirectedMatrix) SetEdge(e graph.Edge) {
g.setWeightedEdge(e, 1)
}
// SetWeightedEdge sets e, an edge from one node to another. If the ends of the edge are not in g
// or the edge is a self loop, SetWeightedEdge panics.
func (g *UndirectedMatrix) SetWeightedEdge(e graph.WeightedEdge) {
g.setWeightedEdge(e, e.Weight())
}
func (g *UndirectedMatrix) setWeightedEdge(e graph.Edge, weight float64) {
fid := e.From().ID()
tid := e.To().ID()
if fid == tid {
panic("simple: set illegal edge")
}
if int64(int(fid)) != fid {
panic("simple: unavailable from node ID for dense graph")
}
if int64(int(tid)) != tid {
panic("simple: unavailable to node ID for dense graph")
}
// fid and tid are not greater than maximum int by this point.
g.mat.SetSym(int(fid), int(tid), weight)
}
// RemoveEdge removes the edge with the given end point IDs from the graph, leaving the terminal
// nodes. If the edge does not exist it is a no-op.
func (g *UndirectedMatrix) RemoveEdge(fid, tid int64) {
if !g.has(fid) {
return
}
if !g.has(tid) {
return
}
// fid and tid are not greater than maximum int by this point.
g.mat.SetSym(int(fid), int(tid), g.absent)
}
// Degree returns the degree of n in g.
func (g *UndirectedMatrix) Degree(id int64) int {
if !g.has(id) {
return 0
}
var deg int
r := g.mat.Symmetric()
for i := 0; i < r; i++ {
if int64(i) == id {
continue
}
// id is not greater than maximum int by this point.
if !isSame(g.mat.At(int(id), i), g.absent) {
deg++
}
}
return deg
}
// Matrix returns the mat.Matrix representation of the graph.
func (g *UndirectedMatrix) Matrix() mat.Matrix {
// Prevent alteration of dimensions of the returned matrix.
m := *g.mat
return &m
}
@@ -0,0 +1,222 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"fmt"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/uid"
)
// DirectedGraph implements a generalized directed graph.
type DirectedGraph struct {
nodes map[int64]graph.Node
from map[int64]map[int64]graph.Edge
to map[int64]map[int64]graph.Edge
nodeIDs uid.Set
}
// NewDirectedGraph returns a DirectedGraph.
func NewDirectedGraph() *DirectedGraph {
return &DirectedGraph{
nodes: make(map[int64]graph.Node),
from: make(map[int64]map[int64]graph.Edge),
to: make(map[int64]map[int64]graph.Edge),
nodeIDs: uid.NewSet(),
}
}
// NewNode returns a new unique Node to be added to g. The Node's ID does
// not become valid in g until the Node is added to g.
func (g *DirectedGraph) NewNode() graph.Node {
if len(g.nodes) == 0 {
return Node(0)
}
if int64(len(g.nodes)) == uid.Max {
panic("simple: cannot allocate node: no slot")
}
return Node(g.nodeIDs.NewID())
}
// AddNode adds n to the graph. It panics if the added node ID matches an existing node ID.
func (g *DirectedGraph) AddNode(n graph.Node) {
if _, exists := g.nodes[n.ID()]; exists {
panic(fmt.Sprintf("simple: node ID collision: %d", n.ID()))
}
g.nodes[n.ID()] = n
g.from[n.ID()] = make(map[int64]graph.Edge)
g.to[n.ID()] = make(map[int64]graph.Edge)
g.nodeIDs.Use(n.ID())
}
// RemoveNode removes the node with the given ID from the graph, as well as any edges attached
// to it. If the node is not in the graph it is a no-op.
func (g *DirectedGraph) RemoveNode(id int64) {
if _, ok := g.nodes[id]; !ok {
return
}
delete(g.nodes, id)
for from := range g.from[id] {
delete(g.to[from], id)
}
delete(g.from, id)
for to := range g.to[id] {
delete(g.from[to], id)
}
delete(g.to, id)
g.nodeIDs.Release(id)
}
// NewEdge returns a new Edge from the source to the destination node.
func (g *DirectedGraph) NewEdge(from, to graph.Node) graph.Edge {
return &Edge{F: from, T: to}
}
// SetEdge adds e, an edge from one node to another. If the nodes do not exist, they are added.
// It will panic if the IDs of the e.From and e.To are equal.
func (g *DirectedGraph) SetEdge(e graph.Edge) {
var (
from = e.From()
fid = from.ID()
to = e.To()
tid = to.ID()
)
if fid == tid {
panic("simple: adding self edge")
}
if !g.Has(fid) {
g.AddNode(from)
}
if !g.Has(tid) {
g.AddNode(to)
}
g.from[fid][tid] = e
g.to[tid][fid] = e
}
// RemoveEdge removes the edge with the given end point IDs from the graph, leaving the terminal
// nodes. If the edge does not exist it is a no-op.
func (g *DirectedGraph) RemoveEdge(fid, tid int64) {
if _, ok := g.nodes[fid]; !ok {
return
}
if _, ok := g.nodes[tid]; !ok {
return
}
delete(g.from[fid], tid)
delete(g.to[tid], fid)
}
// Node returns the node in the graph with the given ID.
func (g *DirectedGraph) Node(id int64) graph.Node {
return g.nodes[id]
}
// Has returns whether the node exists within the graph.
func (g *DirectedGraph) Has(id int64) bool {
_, ok := g.nodes[id]
return ok
}
// Nodes returns all the nodes in the graph.
func (g *DirectedGraph) Nodes() []graph.Node {
if len(g.nodes) == 0 {
return nil
}
nodes := make([]graph.Node, len(g.nodes))
i := 0
for _, n := range g.nodes {
nodes[i] = n
i++
}
return nodes
}
// Edges returns all the edges in the graph.
func (g *DirectedGraph) Edges() []graph.Edge {
var edges []graph.Edge
for _, u := range g.nodes {
for _, e := range g.from[u.ID()] {
edges = append(edges, e)
}
}
return edges
}
// From returns all nodes in g that can be reached directly from n.
func (g *DirectedGraph) From(id int64) []graph.Node {
if _, ok := g.from[id]; !ok {
return nil
}
from := make([]graph.Node, len(g.from[id]))
i := 0
for vid := range g.from[id] {
from[i] = g.nodes[vid]
i++
}
return from
}
// To returns all nodes in g that can reach directly to n.
func (g *DirectedGraph) To(id int64) []graph.Node {
if _, ok := g.from[id]; !ok {
return nil
}
to := make([]graph.Node, len(g.to[id]))
i := 0
for uid := range g.to[id] {
to[i] = g.nodes[uid]
i++
}
return to
}
// HasEdgeBetween returns whether an edge exists between nodes x and y without
// considering direction.
func (g *DirectedGraph) HasEdgeBetween(xid, yid int64) bool {
if _, ok := g.from[xid][yid]; ok {
return true
}
_, ok := g.from[yid][xid]
return ok
}
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *DirectedGraph) Edge(uid, vid int64) graph.Edge {
edge, ok := g.from[uid][vid]
if !ok {
return nil
}
return edge
}
// HasEdgeFromTo returns whether an edge exists in the graph from u to v.
func (g *DirectedGraph) HasEdgeFromTo(uid, vid int64) bool {
if _, ok := g.from[uid][vid]; !ok {
return false
}
return true
}
// Degree returns the in+out degree of n in g.
func (g *DirectedGraph) Degree(id int64) int {
if _, ok := g.nodes[id]; !ok {
return 0
}
return len(g.from[id]) + len(g.to[id])
}
@@ -0,0 +1,7 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package simple provides a suite of simple graph implementations satisfying
// the gonum/graph interfaces.
package simple
@@ -0,0 +1,51 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"math"
"gonum.org/v1/gonum/graph"
)
// Node is a simple graph node.
type Node int64
// ID returns the ID number of the node.
func (n Node) ID() int64 {
return int64(n)
}
// Edge is a simple graph edge.
type Edge struct {
F, T graph.Node
}
// From returns the from-node of the edge.
func (e Edge) From() graph.Node { return e.F }
// To returns the to-node of the edge.
func (e Edge) To() graph.Node { return e.T }
// WeightedEdge is a simple weighted graph edge.
type WeightedEdge struct {
F, T graph.Node
W float64
}
// From returns the from-node of the edge.
func (e WeightedEdge) From() graph.Node { return e.F }
// To returns the to-node of the edge.
func (e WeightedEdge) To() graph.Node { return e.T }
// Weight returns the weight of the edge.
func (e WeightedEdge) Weight() float64 { return e.W }
// isSame returns whether two float64 values are the same where NaN values
// are equalable.
func isSame(a, b float64) bool {
return a == b || (math.IsNaN(a) && math.IsNaN(b))
}
@@ -0,0 +1,203 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"fmt"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/uid"
)
// UndirectedGraph implements a generalized undirected graph.
type UndirectedGraph struct {
nodes map[int64]graph.Node
edges map[int64]map[int64]graph.Edge
nodeIDs uid.Set
}
// NewUndirectedGraph returns an UndirectedGraph.
func NewUndirectedGraph() *UndirectedGraph {
return &UndirectedGraph{
nodes: make(map[int64]graph.Node),
edges: make(map[int64]map[int64]graph.Edge),
nodeIDs: uid.NewSet(),
}
}
// NewNode returns a new unique Node to be added to g. The Node's ID does
// not become valid in g until the Node is added to g.
func (g *UndirectedGraph) NewNode() graph.Node {
if len(g.nodes) == 0 {
return Node(0)
}
if int64(len(g.nodes)) == uid.Max {
panic("simple: cannot allocate node: no slot")
}
return Node(g.nodeIDs.NewID())
}
// AddNode adds n to the graph. It panics if the added node ID matches an existing node ID.
func (g *UndirectedGraph) AddNode(n graph.Node) {
if _, exists := g.nodes[n.ID()]; exists {
panic(fmt.Sprintf("simple: node ID collision: %d", n.ID()))
}
g.nodes[n.ID()] = n
g.edges[n.ID()] = make(map[int64]graph.Edge)
g.nodeIDs.Use(n.ID())
}
// RemoveNode removes the node with the given ID from the graph, as well as any edges attached
// to it. If the node is not in the graph it is a no-op.
func (g *UndirectedGraph) RemoveNode(id int64) {
if _, ok := g.nodes[id]; !ok {
return
}
delete(g.nodes, id)
for from := range g.edges[id] {
delete(g.edges[from], id)
}
delete(g.edges, id)
g.nodeIDs.Release(id)
}
// NewEdge returns a new Edge from the source to the destination node.
func (g *UndirectedGraph) NewEdge(from, to graph.Node) graph.Edge {
return &Edge{F: from, T: to}
}
// SetEdge adds e, an edge from one node to another. If the nodes do not exist, they are added.
// It will panic if the IDs of the e.From and e.To are equal.
func (g *UndirectedGraph) SetEdge(e graph.Edge) {
var (
from = e.From()
fid = from.ID()
to = e.To()
tid = to.ID()
)
if fid == tid {
panic("simple: adding self edge")
}
if !g.Has(fid) {
g.AddNode(from)
}
if !g.Has(tid) {
g.AddNode(to)
}
g.edges[fid][tid] = e
g.edges[tid][fid] = e
}
// RemoveEdge removes the edge with the given end IDs from the graph, leaving the terminal nodes.
// If the edge does not exist it is a no-op.
func (g *UndirectedGraph) RemoveEdge(fid, tid int64) {
if _, ok := g.nodes[fid]; !ok {
return
}
if _, ok := g.nodes[tid]; !ok {
return
}
delete(g.edges[fid], tid)
delete(g.edges[tid], fid)
}
// Node returns the node in the graph with the given ID.
func (g *UndirectedGraph) Node(id int64) graph.Node {
return g.nodes[id]
}
// Has returns whether the node exists within the graph.
func (g *UndirectedGraph) Has(id int64) bool {
_, ok := g.nodes[id]
return ok
}
// Nodes returns all the nodes in the graph.
func (g *UndirectedGraph) Nodes() []graph.Node {
if len(g.nodes) == 0 {
return nil
}
nodes := make([]graph.Node, len(g.nodes))
i := 0
for _, n := range g.nodes {
nodes[i] = n
i++
}
return nodes
}
// Edges returns all the edges in the graph.
func (g *UndirectedGraph) Edges() []graph.Edge {
if len(g.edges) == 0 {
return nil
}
var edges []graph.Edge
seen := make(map[[2]int64]struct{})
for _, u := range g.edges {
for _, e := range u {
uid := e.From().ID()
vid := e.To().ID()
if _, ok := seen[[2]int64{uid, vid}]; ok {
continue
}
seen[[2]int64{uid, vid}] = struct{}{}
seen[[2]int64{vid, uid}] = struct{}{}
edges = append(edges, e)
}
}
return edges
}
// From returns all nodes in g that can be reached directly from n.
func (g *UndirectedGraph) From(id int64) []graph.Node {
if !g.Has(id) {
return nil
}
nodes := make([]graph.Node, len(g.edges[id]))
i := 0
for from := range g.edges[id] {
nodes[i] = g.nodes[from]
i++
}
return nodes
}
// HasEdgeBetween returns whether an edge exists between nodes x and y.
func (g *UndirectedGraph) HasEdgeBetween(xid, yid int64) bool {
_, ok := g.edges[xid][yid]
return ok
}
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *UndirectedGraph) Edge(uid, vid int64) graph.Edge {
return g.EdgeBetween(uid, vid)
}
// EdgeBetween returns the edge between nodes x and y.
func (g *UndirectedGraph) EdgeBetween(xid, yid int64) graph.Edge {
edge, ok := g.edges[xid][yid]
if !ok {
return nil
}
return edge
}
// Degree returns the degree of n in g.
func (g *UndirectedGraph) Degree(id int64) int {
if _, ok := g.nodes[id]; !ok {
return 0
}
return len(g.edges[id])
}
@@ -0,0 +1,261 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"fmt"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/uid"
)
// WeightedDirectedGraph implements a generalized weighted directed graph.
type WeightedDirectedGraph struct {
nodes map[int64]graph.Node
from map[int64]map[int64]graph.WeightedEdge
to map[int64]map[int64]graph.WeightedEdge
self, absent float64
nodeIDs uid.Set
}
// NewWeightedDirectedGraph returns a WeightedDirectedGraph with the specified self and absent
// edge weight values.
func NewWeightedDirectedGraph(self, absent float64) *WeightedDirectedGraph {
return &WeightedDirectedGraph{
nodes: make(map[int64]graph.Node),
from: make(map[int64]map[int64]graph.WeightedEdge),
to: make(map[int64]map[int64]graph.WeightedEdge),
self: self,
absent: absent,
nodeIDs: uid.NewSet(),
}
}
// NewNode returns a new unique Node to be added to g. The Node's ID does
// not become valid in g until the Node is added to g.
func (g *WeightedDirectedGraph) NewNode() graph.Node {
if len(g.nodes) == 0 {
return Node(0)
}
if int64(len(g.nodes)) == uid.Max {
panic("simple: cannot allocate node: no slot")
}
return Node(g.nodeIDs.NewID())
}
// AddNode adds n to the graph. It panics if the added node ID matches an existing node ID.
func (g *WeightedDirectedGraph) AddNode(n graph.Node) {
if _, exists := g.nodes[n.ID()]; exists {
panic(fmt.Sprintf("simple: node ID collision: %d", n.ID()))
}
g.nodes[n.ID()] = n
g.from[n.ID()] = make(map[int64]graph.WeightedEdge)
g.to[n.ID()] = make(map[int64]graph.WeightedEdge)
g.nodeIDs.Use(n.ID())
}
// RemoveNode removes the node with the given ID from the graph, as well as any edges attached
// to it. If the node is not in the graph it is a no-op.
func (g *WeightedDirectedGraph) RemoveNode(id int64) {
if _, ok := g.nodes[id]; !ok {
return
}
delete(g.nodes, id)
for from := range g.from[id] {
delete(g.to[from], id)
}
delete(g.from, id)
for to := range g.to[id] {
delete(g.from[to], id)
}
delete(g.to, id)
g.nodeIDs.Release(id)
}
// NewWeightedEdge returns a new weighted edge from the source to the destination node.
func (g *WeightedDirectedGraph) NewWeightedEdge(from, to graph.Node, weight float64) graph.WeightedEdge {
return &WeightedEdge{F: from, T: to, W: weight}
}
// SetWeightedEdge adds a weighted edge from one node to another. If the nodes do not exist, they are added.
// It will panic if the IDs of the e.From and e.To are equal.
func (g *WeightedDirectedGraph) SetWeightedEdge(e graph.WeightedEdge) {
var (
from = e.From()
fid = from.ID()
to = e.To()
tid = to.ID()
)
if fid == tid {
panic("simple: adding self edge")
}
if !g.Has(fid) {
g.AddNode(from)
}
if !g.Has(tid) {
g.AddNode(to)
}
g.from[fid][tid] = e
g.to[tid][fid] = e
}
// RemoveEdge removes the edge with the given end point IDs from the graph, leaving the terminal
// nodes. If the edge does not exist it is a no-op.
func (g *WeightedDirectedGraph) RemoveEdge(fid, tid int64) {
if _, ok := g.nodes[fid]; !ok {
return
}
if _, ok := g.nodes[tid]; !ok {
return
}
delete(g.from[fid], tid)
delete(g.to[tid], fid)
}
// Node returns the node in the graph with the given ID.
func (g *WeightedDirectedGraph) Node(id int64) graph.Node {
return g.nodes[id]
}
// Has returns whether the node exists within the graph.
func (g *WeightedDirectedGraph) Has(id int64) bool {
_, ok := g.nodes[id]
return ok
}
// Nodes returns all the nodes in the graph.
func (g *WeightedDirectedGraph) Nodes() []graph.Node {
if len(g.from) == 0 {
return nil
}
nodes := make([]graph.Node, len(g.nodes))
i := 0
for _, n := range g.nodes {
nodes[i] = n
i++
}
return nodes
}
// Edges returns all the edges in the graph.
func (g *WeightedDirectedGraph) Edges() []graph.Edge {
var edges []graph.Edge
for _, u := range g.nodes {
for _, e := range g.from[u.ID()] {
edges = append(edges, e)
}
}
return edges
}
// WeightedEdges returns all the weighted edges in the graph.
func (g *WeightedDirectedGraph) WeightedEdges() []graph.WeightedEdge {
var edges []graph.WeightedEdge
for _, u := range g.nodes {
for _, e := range g.from[u.ID()] {
edges = append(edges, e)
}
}
return edges
}
// From returns all nodes in g that can be reached directly from n.
func (g *WeightedDirectedGraph) From(id int64) []graph.Node {
if _, ok := g.from[id]; !ok {
return nil
}
from := make([]graph.Node, len(g.from[id]))
i := 0
for vid := range g.from[id] {
from[i] = g.nodes[vid]
i++
}
return from
}
// To returns all nodes in g that can reach directly to n.
func (g *WeightedDirectedGraph) To(id int64) []graph.Node {
if _, ok := g.from[id]; !ok {
return nil
}
to := make([]graph.Node, len(g.to[id]))
i := 0
for uid := range g.to[id] {
to[i] = g.nodes[uid]
i++
}
return to
}
// HasEdgeBetween returns whether an edge exists between nodes x and y without
// considering direction.
func (g *WeightedDirectedGraph) HasEdgeBetween(xid, yid int64) bool {
if _, ok := g.from[xid][yid]; ok {
return true
}
_, ok := g.from[yid][xid]
return ok
}
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *WeightedDirectedGraph) Edge(uid, vid int64) graph.Edge {
return g.WeightedEdge(uid, vid)
}
// WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *WeightedDirectedGraph) WeightedEdge(uid, vid int64) graph.WeightedEdge {
edge, ok := g.from[uid][vid]
if !ok {
return nil
}
return edge
}
// HasEdgeFromTo returns whether an edge exists in the graph from u to v.
func (g *WeightedDirectedGraph) HasEdgeFromTo(uid, vid int64) bool {
if _, ok := g.from[uid][vid]; !ok {
return false
}
return true
}
// Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge.
// If x and y are the same node or there is no joining edge between the two nodes the weight
// value returned is either the graph's absent or self value. Weight returns true if an edge
// exists between x and y or if x and y have the same ID, false otherwise.
func (g *WeightedDirectedGraph) Weight(xid, yid int64) (w float64, ok bool) {
if xid == yid {
return g.self, true
}
if to, ok := g.from[xid]; ok {
if e, ok := to[yid]; ok {
return e.Weight(), true
}
}
return g.absent, false
}
// Degree returns the in+out degree of n in g.
func (g *WeightedDirectedGraph) Degree(id int64) int {
if _, ok := g.nodes[id]; !ok {
return 0
}
return len(g.from[id]) + len(g.to[id])
}
@@ -0,0 +1,255 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simple
import (
"fmt"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/uid"
)
// WeightedUndirectedGraph implements a generalized weighted undirected graph.
type WeightedUndirectedGraph struct {
nodes map[int64]graph.Node
edges map[int64]map[int64]graph.WeightedEdge
self, absent float64
nodeIDs uid.Set
}
// NewWeightedUndirectedGraph returns an WeightedUndirectedGraph with the specified self and absent
// edge weight values.
func NewWeightedUndirectedGraph(self, absent float64) *WeightedUndirectedGraph {
return &WeightedUndirectedGraph{
nodes: make(map[int64]graph.Node),
edges: make(map[int64]map[int64]graph.WeightedEdge),
self: self,
absent: absent,
nodeIDs: uid.NewSet(),
}
}
// NewNode returns a new unique Node to be added to g. The Node's ID does
// not become valid in g until the Node is added to g.
func (g *WeightedUndirectedGraph) NewNode() graph.Node {
if len(g.nodes) == 0 {
return Node(0)
}
if int64(len(g.nodes)) == uid.Max {
panic("simple: cannot allocate node: no slot")
}
return Node(g.nodeIDs.NewID())
}
// AddNode adds n to the graph. It panics if the added node ID matches an existing node ID.
func (g *WeightedUndirectedGraph) AddNode(n graph.Node) {
if _, exists := g.nodes[n.ID()]; exists {
panic(fmt.Sprintf("simple: node ID collision: %d", n.ID()))
}
g.nodes[n.ID()] = n
g.edges[n.ID()] = make(map[int64]graph.WeightedEdge)
g.nodeIDs.Use(n.ID())
}
// RemoveNode removes the node with the given ID from the graph, as well as any edges attached
// to it. If the node is not in the graph it is a no-op.
func (g *WeightedUndirectedGraph) RemoveNode(id int64) {
if _, ok := g.nodes[id]; !ok {
return
}
delete(g.nodes, id)
for from := range g.edges[id] {
delete(g.edges[from], id)
}
delete(g.edges, id)
g.nodeIDs.Release(id)
}
// NewWeightedEdge returns a new weighted edge from the source to the destination node.
func (g *WeightedUndirectedGraph) NewWeightedEdge(from, to graph.Node, weight float64) graph.WeightedEdge {
return &WeightedEdge{F: from, T: to, W: weight}
}
// SetWeightedEdge adds a weighted edge from one node to another. If the nodes do not exist, they are added.
// It will panic if the IDs of the e.From and e.To are equal.
func (g *WeightedUndirectedGraph) SetWeightedEdge(e graph.WeightedEdge) {
var (
from = e.From()
fid = from.ID()
to = e.To()
tid = to.ID()
)
if fid == tid {
panic("simple: adding self edge")
}
if !g.Has(fid) {
g.AddNode(from)
}
if !g.Has(tid) {
g.AddNode(to)
}
g.edges[fid][tid] = e
g.edges[tid][fid] = e
}
// RemoveEdge removes the edge with the given end point IDs from the graph, leaving the terminal
// nodes. If the edge does not exist it is a no-op.
func (g *WeightedUndirectedGraph) RemoveEdge(fid, tid int64) {
if _, ok := g.nodes[fid]; !ok {
return
}
if _, ok := g.nodes[tid]; !ok {
return
}
delete(g.edges[fid], tid)
delete(g.edges[tid], fid)
}
// Node returns the node in the graph with the given ID.
func (g *WeightedUndirectedGraph) Node(id int64) graph.Node {
return g.nodes[id]
}
// Has returns whether the node exists within the graph.
func (g *WeightedUndirectedGraph) Has(id int64) bool {
_, ok := g.nodes[id]
return ok
}
// Nodes returns all the nodes in the graph.
func (g *WeightedUndirectedGraph) Nodes() []graph.Node {
if len(g.nodes) == 0 {
return nil
}
nodes := make([]graph.Node, len(g.nodes))
i := 0
for _, n := range g.nodes {
nodes[i] = n
i++
}
return nodes
}
// Edges returns all the edges in the graph.
func (g *WeightedUndirectedGraph) Edges() []graph.Edge {
if len(g.edges) == 0 {
return nil
}
var edges []graph.Edge
seen := make(map[[2]int64]struct{})
for _, u := range g.edges {
for _, e := range u {
uid := e.From().ID()
vid := e.To().ID()
if _, ok := seen[[2]int64{uid, vid}]; ok {
continue
}
seen[[2]int64{uid, vid}] = struct{}{}
seen[[2]int64{vid, uid}] = struct{}{}
edges = append(edges, e)
}
}
return edges
}
// WeightedEdges returns all the weighted edges in the graph.
func (g *WeightedUndirectedGraph) WeightedEdges() []graph.WeightedEdge {
var edges []graph.WeightedEdge
seen := make(map[[2]int64]struct{})
for _, u := range g.edges {
for _, e := range u {
uid := e.From().ID()
vid := e.To().ID()
if _, ok := seen[[2]int64{uid, vid}]; ok {
continue
}
seen[[2]int64{uid, vid}] = struct{}{}
seen[[2]int64{vid, uid}] = struct{}{}
edges = append(edges, e)
}
}
return edges
}
// From returns all nodes in g that can be reached directly from n.
func (g *WeightedUndirectedGraph) From(id int64) []graph.Node {
if !g.Has(id) {
return nil
}
nodes := make([]graph.Node, len(g.edges[id]))
i := 0
for from := range g.edges[id] {
nodes[i] = g.nodes[from]
i++
}
return nodes
}
// HasEdgeBetween returns whether an edge exists between nodes x and y.
func (g *WeightedUndirectedGraph) HasEdgeBetween(xid, yid int64) bool {
_, ok := g.edges[xid][yid]
return ok
}
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *WeightedUndirectedGraph) Edge(uid, vid int64) graph.Edge {
return g.WeightedEdgeBetween(uid, vid)
}
// WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
func (g *WeightedUndirectedGraph) WeightedEdge(uid, vid int64) graph.WeightedEdge {
return g.WeightedEdgeBetween(uid, vid)
}
// EdgeBetween returns the edge between nodes x and y.
func (g *WeightedUndirectedGraph) EdgeBetween(xid, yid int64) graph.Edge {
return g.WeightedEdgeBetween(xid, yid)
}
// WeightedEdgeBetween returns the weighted edge between nodes x and y.
func (g *WeightedUndirectedGraph) WeightedEdgeBetween(xid, yid int64) graph.WeightedEdge {
edge, ok := g.edges[xid][yid]
if !ok {
return nil
}
return edge
}
// Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge.
// If x and y are the same node or there is no joining edge between the two nodes the weight
// value returned is either the graph's absent or self value. Weight returns true if an edge
// exists between x and y or if x and y have the same ID, false otherwise.
func (g *WeightedUndirectedGraph) Weight(xid, yid int64) (w float64, ok bool) {
if xid == yid {
return g.self, true
}
if n, ok := g.edges[xid]; ok {
if e, ok := n[yid]; ok {
return e.Weight(), true
}
}
return g.absent, false
}
// Degree returns the degree of n in g.
func (g *WeightedUndirectedGraph) Degree(id int64) int {
if _, ok := g.nodes[id]; !ok {
return 0
}
return len(g.edges[id])
}
@@ -0,0 +1,250 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package topo
import (
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/ordered"
"gonum.org/v1/gonum/graph/internal/set"
)
// DegeneracyOrdering returns the degeneracy ordering and the k-cores of
// the undirected graph g.
func DegeneracyOrdering(g graph.Undirected) (order []graph.Node, cores [][]graph.Node) {
order, offsets := degeneracyOrdering(g)
ordered.Reverse(order)
cores = make([][]graph.Node, len(offsets))
offset := len(order)
for i, n := range offsets {
cores[i] = order[offset-n : offset]
offset -= n
}
return order, cores
}
// KCore returns the k-core of the undirected graph g with nodes in an
// optimal ordering for the coloring number.
func KCore(k int, g graph.Undirected) []graph.Node {
order, offsets := degeneracyOrdering(g)
var offset int
for _, n := range offsets[:k] {
offset += n
}
core := make([]graph.Node, len(order)-offset)
copy(core, order[offset:])
return core
}
// degeneracyOrdering is the common code for DegeneracyOrdering and KCore. It
// returns l, the nodes of g in optimal ordering for coloring number and
// s, a set of relative offsets into l for each k-core, where k is an index
// into s.
func degeneracyOrdering(g graph.Undirected) (l []graph.Node, s []int) {
nodes := g.Nodes()
// The algorithm used here is essentially as described at
// http://en.wikipedia.org/w/index.php?title=Degeneracy_%28graph_theory%29&oldid=640308710
// Initialize an output list L in return parameters.
// Compute a number d_v for each vertex v in G,
// the number of neighbors of v that are not already in L.
// Initially, these numbers are just the degrees of the vertices.
dv := make(map[int64]int, len(nodes))
var (
maxDegree int
neighbours = make(map[int64][]graph.Node)
)
for _, n := range nodes {
id := n.ID()
adj := g.From(id)
neighbours[id] = adj
dv[id] = len(adj)
if len(adj) > maxDegree {
maxDegree = len(adj)
}
}
// Initialize an array D such that D[i] contains a list of the
// vertices v that are not already in L for which d_v = i.
d := make([][]graph.Node, maxDegree+1)
for _, n := range nodes {
deg := dv[n.ID()]
d[deg] = append(d[deg], n)
}
// Initialize k to 0.
k := 0
// Repeat n times:
s = []int{0}
for range nodes {
// Scan the array cells D[0], D[1], ... until
// finding an i for which D[i] is nonempty.
var (
i int
di []graph.Node
)
for i, di = range d {
if len(di) != 0 {
break
}
}
// Set k to max(k,i).
if i > k {
k = i
s = append(s, make([]int, k-len(s)+1)...)
}
// Select a vertex v from D[i]. Add v to the
// beginning of L and remove it from D[i].
var v graph.Node
v, d[i] = di[len(di)-1], di[:len(di)-1]
l = append(l, v)
s[k]++
delete(dv, v.ID())
// For each neighbor w of v not already in L,
// subtract one from d_w and move w to the
// cell of D corresponding to the new value of d_w.
for _, w := range neighbours[v.ID()] {
dw, ok := dv[w.ID()]
if !ok {
continue
}
for i, n := range d[dw] {
if n.ID() == w.ID() {
d[dw][i], d[dw] = d[dw][len(d[dw])-1], d[dw][:len(d[dw])-1]
dw--
d[dw] = append(d[dw], w)
break
}
}
dv[w.ID()] = dw
}
}
return l, s
}
// BronKerbosch returns the set of maximal cliques of the undirected graph g.
func BronKerbosch(g graph.Undirected) [][]graph.Node {
nodes := g.Nodes()
// The algorithm used here is essentially BronKerbosch3 as described at
// http://en.wikipedia.org/w/index.php?title=Bron%E2%80%93Kerbosch_algorithm&oldid=656805858
p := make(set.Nodes, len(nodes))
for _, n := range nodes {
p.Add(n)
}
x := make(set.Nodes)
var bk bronKerbosch
order, _ := degeneracyOrdering(g)
ordered.Reverse(order)
for _, v := range order {
neighbours := g.From(v.ID())
nv := make(set.Nodes, len(neighbours))
for _, n := range neighbours {
nv.Add(n)
}
bk.maximalCliquePivot(g, []graph.Node{v}, make(set.Nodes).Intersect(p, nv), make(set.Nodes).Intersect(x, nv))
p.Remove(v)
x.Add(v)
}
return bk
}
type bronKerbosch [][]graph.Node
func (bk *bronKerbosch) maximalCliquePivot(g graph.Undirected, r []graph.Node, p, x set.Nodes) {
if len(p) == 0 && len(x) == 0 {
*bk = append(*bk, r)
return
}
neighbours := bk.choosePivotFrom(g, p, x)
nu := make(set.Nodes, len(neighbours))
for _, n := range neighbours {
nu.Add(n)
}
for _, v := range p {
if nu.Has(v) {
continue
}
vid := v.ID()
neighbours := g.From(vid)
nv := make(set.Nodes, len(neighbours))
for _, n := range neighbours {
nv.Add(n)
}
var found bool
for _, n := range r {
if n.ID() == vid {
found = true
break
}
}
var sr []graph.Node
if !found {
sr = append(r[:len(r):len(r)], v)
}
bk.maximalCliquePivot(g, sr, make(set.Nodes).Intersect(p, nv), make(set.Nodes).Intersect(x, nv))
p.Remove(v)
x.Add(v)
}
}
func (*bronKerbosch) choosePivotFrom(g graph.Undirected, p, x set.Nodes) (neighbors []graph.Node) {
// TODO(kortschak): Investigate the impact of pivot choice that maximises
// |p ⋂ neighbours(u)| as a function of input size. Until then, leave as
// compile time option.
if !tomitaTanakaTakahashi {
for _, n := range p {
return g.From(n.ID())
}
for _, n := range x {
return g.From(n.ID())
}
panic("bronKerbosch: empty set")
}
var (
max = -1
pivot graph.Node
)
maxNeighbors := func(s set.Nodes) {
outer:
for _, u := range s {
nb := g.From(u.ID())
c := len(nb)
if c <= max {
continue
}
for n := range nb {
if _, ok := p[int64(n)]; ok {
continue
}
c--
if c <= max {
continue outer
}
}
max = c
pivot = u
neighbors = nb
}
}
maxNeighbors(p)
maxNeighbors(x)
if pivot == nil {
panic("bronKerbosch: empty set")
}
return neighbors
}
@@ -0,0 +1,106 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package topo
import (
"sort"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/ordered"
"gonum.org/v1/gonum/graph/internal/set"
)
// Builder is a pure topological graph construction type.
type Builder interface {
AddNode(graph.Node)
SetEdge(graph.Edge)
}
// CliqueGraph builds the clique graph of g in dst using Clique and CliqueGraphEdge
// nodes and edges. The nodes returned by calls to Nodes on the nodes and edges of
// the constructed graph are the cliques and the common nodes between cliques
// respectively. The dst graph is not cleared.
func CliqueGraph(dst Builder, g graph.Undirected) {
cliques := BronKerbosch(g)
// Construct a consistent view of cliques in g. Sorting costs
// us a little, but not as much as the cliques themselves.
for _, c := range cliques {
sort.Sort(ordered.ByID(c))
}
sort.Sort(ordered.BySliceIDs(cliques))
cliqueNodes := make(cliqueNodeSets, len(cliques))
for id, c := range cliques {
s := make(set.Nodes, len(c))
for _, n := range c {
s.Add(n)
}
ns := &nodeSet{Clique: Clique{id: int64(id), nodes: c}, nodes: s}
dst.AddNode(ns.Clique)
for _, n := range c {
nid := n.ID()
cliqueNodes[nid] = append(cliqueNodes[nid], ns)
}
}
for _, cliques := range cliqueNodes {
for i, uc := range cliques {
for _, vc := range cliques[i+1:] {
// Retain the nodes that contribute to the
// edge between the cliques.
var edgeNodes []graph.Node
switch 1 {
case len(uc.Clique.nodes):
edgeNodes = []graph.Node{uc.Clique.nodes[0]}
case len(vc.Clique.nodes):
edgeNodes = []graph.Node{vc.Clique.nodes[0]}
default:
for _, n := range make(set.Nodes).Intersect(uc.nodes, vc.nodes) {
edgeNodes = append(edgeNodes, n)
}
sort.Sort(ordered.ByID(edgeNodes))
}
dst.SetEdge(CliqueGraphEdge{from: uc.Clique, to: vc.Clique, nodes: edgeNodes})
}
}
}
}
type cliqueNodeSets map[int64][]*nodeSet
type nodeSet struct {
Clique
nodes set.Nodes
}
// Clique is a node in a clique graph.
type Clique struct {
id int64
nodes []graph.Node
}
// ID returns the node ID.
func (n Clique) ID() int64 { return n.id }
// Nodes returns the nodes in the clique.
func (n Clique) Nodes() []graph.Node { return n.nodes }
// CliqueGraphEdge is an edge in a clique graph.
type CliqueGraphEdge struct {
from, to Clique
nodes []graph.Node
}
// From returns the from node of the edge.
func (e CliqueGraphEdge) From() graph.Node { return e.from }
// To returns the to node of the edge.
func (e CliqueGraphEdge) To() graph.Node { return e.to }
// Nodes returns the common nodes in the cliques of the underlying graph
// corresponding to the from and to nodes in the clique graph.
func (e CliqueGraphEdge) Nodes() []graph.Node { return e.nodes }
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package topo provides graph topology analysis functions.
package topo
@@ -0,0 +1,281 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package topo
import (
"sort"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/ordered"
"gonum.org/v1/gonum/graph/internal/set"
)
// johnson implements Johnson's "Finding all the elementary
// circuits of a directed graph" algorithm. SIAM J. Comput. 4(1):1975.
//
// Comments in the johnson methods are kept in sync with the comments
// and labels from the paper.
type johnson struct {
adjacent johnsonGraph // SCC adjacency list.
b []set.Ints // Johnson's "B-list".
blocked []bool
s int
stack []graph.Node
result [][]graph.Node
}
// DirectedCyclesIn returns the set of elementary cycles in the graph g.
func DirectedCyclesIn(g graph.Directed) [][]graph.Node {
jg := johnsonGraphFrom(g)
j := johnson{
adjacent: jg,
b: make([]set.Ints, len(jg.orig)),
blocked: make([]bool, len(jg.orig)),
}
// len(j.nodes) is the order of g.
for j.s < len(j.adjacent.orig)-1 {
// We use the previous SCC adjacency to reduce the work needed.
sccs := TarjanSCC(j.adjacent.subgraph(j.s))
// A_k = adjacency structure of strong component K with least
// vertex in subgraph of G induced by {s, s+1, ... ,n}.
j.adjacent = j.adjacent.sccSubGraph(sccs, 2) // Only allow SCCs with >= 2 vertices.
if j.adjacent.order() == 0 {
break
}
// s = least vertex in V_k
if s := j.adjacent.leastVertexIndex(); s < j.s {
j.s = s
}
for i, v := range j.adjacent.orig {
if !j.adjacent.nodes.Has(v.ID()) {
continue
}
if len(j.adjacent.succ[v.ID()]) > 0 {
j.blocked[i] = false
j.b[i] = make(set.Ints)
}
}
//L3:
_ = j.circuit(j.s)
j.s++
}
return j.result
}
// circuit is the CIRCUIT sub-procedure in the paper.
func (j *johnson) circuit(v int) bool {
f := false
n := j.adjacent.orig[v]
j.stack = append(j.stack, n)
j.blocked[v] = true
//L1:
for w := range j.adjacent.succ[n.ID()] {
w := j.adjacent.indexOf(w)
if w == j.s {
// Output circuit composed of stack followed by s.
r := make([]graph.Node, len(j.stack)+1)
copy(r, j.stack)
r[len(r)-1] = j.adjacent.orig[j.s]
j.result = append(j.result, r)
f = true
} else if !j.blocked[w] {
if j.circuit(w) {
f = true
}
}
}
//L2:
if f {
j.unblock(v)
} else {
for w := range j.adjacent.succ[n.ID()] {
j.b[j.adjacent.indexOf(w)].Add(v)
}
}
j.stack = j.stack[:len(j.stack)-1]
return f
}
// unblock is the UNBLOCK sub-procedure in the paper.
func (j *johnson) unblock(u int) {
j.blocked[u] = false
for w := range j.b[u] {
j.b[u].Remove(w)
if j.blocked[w] {
j.unblock(w)
}
}
}
// johnsonGraph is an edge list representation of a graph with helpers
// necessary for Johnson's algorithm
type johnsonGraph struct {
// Keep the original graph nodes and a
// look-up to into the non-sparse
// collection of potentially sparse IDs.
orig []graph.Node
index map[int64]int
nodes set.Int64s
succ map[int64]set.Int64s
}
// johnsonGraphFrom returns a deep copy of the graph g.
func johnsonGraphFrom(g graph.Directed) johnsonGraph {
nodes := g.Nodes()
sort.Sort(ordered.ByID(nodes))
c := johnsonGraph{
orig: nodes,
index: make(map[int64]int, len(nodes)),
nodes: make(set.Int64s, len(nodes)),
succ: make(map[int64]set.Int64s),
}
for i, u := range nodes {
uid := u.ID()
c.index[uid] = i
for _, v := range g.From(uid) {
if c.succ[uid] == nil {
c.succ[uid] = make(set.Int64s)
c.nodes.Add(uid)
}
c.nodes.Add(v.ID())
c.succ[uid].Add(v.ID())
}
}
return c
}
// order returns the order of the graph.
func (g johnsonGraph) order() int { return g.nodes.Count() }
// indexOf returns the index of the retained node for the given node ID.
func (g johnsonGraph) indexOf(id int64) int {
return g.index[id]
}
// leastVertexIndex returns the index into orig of the least vertex.
func (g johnsonGraph) leastVertexIndex() int {
for _, v := range g.orig {
if g.nodes.Has(v.ID()) {
return g.indexOf(v.ID())
}
}
panic("johnsonCycles: empty set")
}
// subgraph returns a subgraph of g induced by {s, s+1, ... , n}. The
// subgraph is destructively generated in g.
func (g johnsonGraph) subgraph(s int) johnsonGraph {
sn := g.orig[s].ID()
for u, e := range g.succ {
if u < sn {
g.nodes.Remove(u)
delete(g.succ, u)
continue
}
for v := range e {
if v < sn {
g.succ[u].Remove(v)
}
}
}
return g
}
// sccSubGraph returns the graph of the tarjan's strongly connected
// components with each SCC containing at least min vertices.
// sccSubGraph returns nil if there is no SCC with at least min
// members.
func (g johnsonGraph) sccSubGraph(sccs [][]graph.Node, min int) johnsonGraph {
if len(g.nodes) == 0 {
g.nodes = nil
g.succ = nil
return g
}
sub := johnsonGraph{
orig: g.orig,
index: g.index,
nodes: make(set.Int64s),
succ: make(map[int64]set.Int64s),
}
var n int
for _, scc := range sccs {
if len(scc) < min {
continue
}
n++
for _, u := range scc {
for _, v := range scc {
if _, ok := g.succ[u.ID()][v.ID()]; ok {
if sub.succ[u.ID()] == nil {
sub.succ[u.ID()] = make(set.Int64s)
sub.nodes.Add(u.ID())
}
sub.nodes.Add(v.ID())
sub.succ[u.ID()].Add(v.ID())
}
}
}
}
if n == 0 {
g.nodes = nil
g.succ = nil
return g
}
return sub
}
// Nodes is required to satisfy Tarjan.
func (g johnsonGraph) Nodes() []graph.Node {
n := make([]graph.Node, 0, len(g.nodes))
for id := range g.nodes {
n = append(n, johnsonGraphNode(id))
}
return n
}
// Successors is required to satisfy Tarjan.
func (g johnsonGraph) From(id int64) []graph.Node {
adj := g.succ[id]
if len(adj) == 0 {
return nil
}
succ := make([]graph.Node, 0, len(adj))
for id := range adj {
succ = append(succ, johnsonGraphNode(id))
}
return succ
}
func (johnsonGraph) Has(int64) bool {
panic("topo: unintended use of johnsonGraph")
}
func (johnsonGraph) HasEdgeBetween(_, _ int64) bool {
panic("topo: unintended use of johnsonGraph")
}
func (johnsonGraph) Edge(_, _ int64) graph.Edge {
panic("topo: unintended use of johnsonGraph")
}
func (johnsonGraph) HasEdgeFromTo(_, _ int64) bool {
panic("topo: unintended use of johnsonGraph")
}
func (johnsonGraph) To(int64) []graph.Node {
panic("topo: unintended use of johnsonGraph")
}
type johnsonGraphNode int64
func (n johnsonGraphNode) ID() int64 { return int64(n) }
@@ -0,0 +1,9 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !tomita
package topo
const tomitaTanakaTakahashi = false
@@ -0,0 +1,81 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package topo
import (
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/linear"
"gonum.org/v1/gonum/graph/internal/set"
)
// UndirectedCyclesIn returns a set of cycles that forms a cycle basis in the graph g.
// Any cycle in g can be constructed as a symmetric difference of its elements.
func UndirectedCyclesIn(g graph.Undirected) [][]graph.Node {
// From "An algorithm for finding a fundamental set of cycles of a graph"
// https://doi.org/10.1145/363219.363232
var cycles [][]graph.Node
done := make(set.Int64s)
var tree linear.NodeStack
for _, n := range g.Nodes() {
id := n.ID()
if done.Has(id) {
continue
}
done.Add(id)
tree = tree[:0]
tree.Push(n)
from := sets{id: set.Int64s{}}
to := map[int64]graph.Node{id: n}
for tree.Len() != 0 {
u := tree.Pop()
uid := u.ID()
adj := from[uid]
for _, v := range g.From(uid) {
vid := v.ID()
switch {
case uid == vid:
cycles = append(cycles, []graph.Node{u})
case !from.has(vid):
done.Add(vid)
to[vid] = u
tree.Push(v)
from.add(uid, vid)
case !adj.Has(vid):
c := []graph.Node{v, u}
adj := from[vid]
p := to[uid]
for !adj.Has(p.ID()) {
c = append(c, p)
p = to[p.ID()]
}
c = append(c, p, c[0])
cycles = append(cycles, c)
adj.Add(uid)
}
}
}
}
return cycles
}
type sets map[int64]set.Int64s
func (s sets) add(uid, vid int64) {
e, ok := s[vid]
if !ok {
e = make(set.Int64s)
s[vid] = e
}
e.Add(uid)
}
func (s sets) has(uid int64) bool {
_, ok := s[uid]
return ok
}
@@ -0,0 +1,197 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package topo
import (
"fmt"
"sort"
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/ordered"
"gonum.org/v1/gonum/graph/internal/set"
)
// Unorderable is an error containing sets of unorderable graph.Nodes.
type Unorderable [][]graph.Node
// Error satisfies the error interface.
func (e Unorderable) Error() string {
const maxNodes = 10
var n int
for _, c := range e {
n += len(c)
}
if n > maxNodes {
// Don't return errors that are too long.
return fmt.Sprintf("topo: no topological ordering: %d nodes in %d cyclic components", n, len(e))
}
return fmt.Sprintf("topo: no topological ordering: cyclic components: %v", [][]graph.Node(e))
}
func lexical(nodes []graph.Node) { sort.Sort(ordered.ByID(nodes)) }
// Sort performs a topological sort of the directed graph g returning the 'from' to 'to'
// sort order. If a topological ordering is not possible, an Unorderable error is returned
// listing cyclic components in g with each cyclic component's members sorted by ID. When
// an Unorderable error is returned, each cyclic component's topological position within
// the sorted nodes is marked with a nil graph.Node.
func Sort(g graph.Directed) (sorted []graph.Node, err error) {
sccs := TarjanSCC(g)
return sortedFrom(sccs, lexical)
}
// SortStabilized performs a topological sort of the directed graph g returning the 'from'
// to 'to' sort order, or the order defined by the in place order sort function where there
// is no unambiguous topological ordering. If a topological ordering is not possible, an
// Unorderable error is returned listing cyclic components in g with each cyclic component's
// members sorted by the provided order function. If order is nil, nodes are ordered lexically
// by node ID. When an Unorderable error is returned, each cyclic component's topological
// position within the sorted nodes is marked with a nil graph.Node.
func SortStabilized(g graph.Directed, order func([]graph.Node)) (sorted []graph.Node, err error) {
if order == nil {
order = lexical
}
sccs := tarjanSCCstabilized(g, order)
return sortedFrom(sccs, order)
}
func sortedFrom(sccs [][]graph.Node, order func([]graph.Node)) ([]graph.Node, error) {
sorted := make([]graph.Node, 0, len(sccs))
var sc Unorderable
for _, s := range sccs {
if len(s) != 1 {
order(s)
sc = append(sc, s)
sorted = append(sorted, nil)
continue
}
sorted = append(sorted, s[0])
}
var err error
if sc != nil {
for i, j := 0, len(sc)-1; i < j; i, j = i+1, j-1 {
sc[i], sc[j] = sc[j], sc[i]
}
err = sc
}
ordered.Reverse(sorted)
return sorted, err
}
// TarjanSCC returns the strongly connected components of the graph g using Tarjan's algorithm.
//
// A strongly connected component of a graph is a set of vertices where it's possible to reach any
// vertex in the set from any other (meaning there's a cycle between them.)
//
// Generally speaking, a directed graph where the number of strongly connected components is equal
// to the number of nodes is acyclic, unless you count reflexive edges as a cycle (which requires
// only a little extra testing.)
//
func TarjanSCC(g graph.Directed) [][]graph.Node {
return tarjanSCCstabilized(g, nil)
}
func tarjanSCCstabilized(g graph.Directed, order func([]graph.Node)) [][]graph.Node {
nodes := g.Nodes()
var succ func(id int64) []graph.Node
if order == nil {
succ = g.From
} else {
order(nodes)
ordered.Reverse(nodes)
succ = func(id int64) []graph.Node {
to := g.From(id)
order(to)
ordered.Reverse(to)
return to
}
}
t := tarjan{
succ: succ,
indexTable: make(map[int64]int, len(nodes)),
lowLink: make(map[int64]int, len(nodes)),
onStack: make(set.Int64s),
}
for _, v := range nodes {
if t.indexTable[v.ID()] == 0 {
t.strongconnect(v)
}
}
return t.sccs
}
// tarjan implements Tarjan's strongly connected component finding
// algorithm. The implementation is from the pseudocode at
//
// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm?oldid=642744644
//
type tarjan struct {
succ func(id int64) []graph.Node
index int
indexTable map[int64]int
lowLink map[int64]int
onStack set.Int64s
stack []graph.Node
sccs [][]graph.Node
}
// strongconnect is the strongconnect function described in the
// wikipedia article.
func (t *tarjan) strongconnect(v graph.Node) {
vID := v.ID()
// Set the depth index for v to the smallest unused index.
t.index++
t.indexTable[vID] = t.index
t.lowLink[vID] = t.index
t.stack = append(t.stack, v)
t.onStack.Add(vID)
// Consider successors of v.
for _, w := range t.succ(vID) {
wID := w.ID()
if t.indexTable[wID] == 0 {
// Successor w has not yet been visited; recur on it.
t.strongconnect(w)
t.lowLink[vID] = min(t.lowLink[vID], t.lowLink[wID])
} else if t.onStack.Has(wID) {
// Successor w is in stack s and hence in the current SCC.
t.lowLink[vID] = min(t.lowLink[vID], t.indexTable[wID])
}
}
// If v is a root node, pop the stack and generate an SCC.
if t.lowLink[vID] == t.indexTable[vID] {
// Start a new strongly connected component.
var (
scc []graph.Node
w graph.Node
)
for {
w, t.stack = t.stack[len(t.stack)-1], t.stack[:len(t.stack)-1]
t.onStack.Remove(w.ID())
// Add w to current strongly connected component.
scc = append(scc, w)
if w.ID() == vID {
break
}
}
// Output the current strongly connected component.
t.sccs = append(t.sccs, scc)
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
@@ -0,0 +1,9 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build tomita
package topo
const tomitaTanakaTakahashi = true
@@ -0,0 +1,68 @@
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package topo
import (
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/traverse"
)
// IsPathIn returns whether path is a path in g.
//
// As special cases, IsPathIn returns true for a zero length path or for
// a path of length 1 when the node in path exists in the graph.
func IsPathIn(g graph.Graph, path []graph.Node) bool {
switch len(path) {
case 0:
return true
case 1:
return g.Has(path[0].ID())
default:
var canReach func(uid, vid int64) bool
switch g := g.(type) {
case graph.Directed:
canReach = g.HasEdgeFromTo
default:
canReach = g.HasEdgeBetween
}
for i, u := range path[:len(path)-1] {
if !canReach(u.ID(), path[i+1].ID()) {
return false
}
}
return true
}
}
// PathExistsIn returns whether there is a path in g starting at from extending
// to to.
//
// PathExistsIn exists as a helper function. If many tests for path existence
// are being performed, other approaches will be more efficient.
func PathExistsIn(g graph.Graph, from, to graph.Node) bool {
var t traverse.BreadthFirst
return t.Walk(g, from, func(n graph.Node, _ int) bool { return n.ID() == to.ID() }) != nil
}
// ConnectedComponents returns the connected components of the undirected graph g.
func ConnectedComponents(g graph.Undirected) [][]graph.Node {
var (
w traverse.DepthFirst
c []graph.Node
cc [][]graph.Node
)
during := func(n graph.Node) {
c = append(c, n)
}
after := func() {
cc = append(cc, []graph.Node(nil))
cc[len(cc)-1] = append(cc[len(cc)-1], c...)
c = c[:0]
}
w.WalkAll(g, nil, after, during)
return cc
}
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package traverse provides basic graph traversal primitives.
package traverse
@@ -0,0 +1,199 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package traverse
import (
"gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/internal/linear"
"gonum.org/v1/gonum/graph/internal/set"
)
var _ Graph = graph.Graph(nil)
// Graph is the subset of graph.Graph necessary for graph traversal.
type Graph interface {
// From returns all nodes that can be reached directly
// from the node with the given ID.
From(id int64) []graph.Node
// Edge returns the edge from u to v, with IDs uid and vid,
// if such an edge exists and nil otherwise. The node v
// must be directly reachable from u as defined by the
// From method.
Edge(uid, vid int64) graph.Edge
}
// BreadthFirst implements stateful breadth-first graph traversal.
type BreadthFirst struct {
EdgeFilter func(graph.Edge) bool
Visit func(u, v graph.Node)
queue linear.NodeQueue
visited set.Int64s
}
// Walk performs a breadth-first traversal of the graph g starting from the given node,
// depending on the the EdgeFilter field and the until parameter if they are non-nil. The
// traversal follows edges for which EdgeFilter(edge) is true and returns the first node
// for which until(node, depth) is true. During the traversal, if the Visit field is
// non-nil, it is called with the nodes joined by each followed edge.
func (b *BreadthFirst) Walk(g Graph, from graph.Node, until func(n graph.Node, d int) bool) graph.Node {
if b.visited == nil {
b.visited = make(set.Int64s)
}
b.queue.Enqueue(from)
b.visited.Add(from.ID())
var (
depth int
children int
untilNext = 1
)
for b.queue.Len() > 0 {
t := b.queue.Dequeue()
if until != nil && until(t, depth) {
return t
}
tid := t.ID()
for _, n := range g.From(tid) {
nid := n.ID()
if b.EdgeFilter != nil && !b.EdgeFilter(g.Edge(tid, nid)) {
continue
}
if b.visited.Has(nid) {
continue
}
if b.Visit != nil {
b.Visit(t, n)
}
b.visited.Add(nid)
children++
b.queue.Enqueue(n)
}
if untilNext--; untilNext == 0 {
depth++
untilNext = children
children = 0
}
}
return nil
}
// WalkAll calls Walk for each unvisited node of the graph g using edges independent
// of their direction. The functions before and after are called prior to commencing
// and after completing each walk if they are non-nil respectively. The function
// during is called on each node as it is traversed.
func (b *BreadthFirst) WalkAll(g graph.Undirected, before, after func(), during func(graph.Node)) {
b.Reset()
for _, from := range g.Nodes() {
if b.Visited(from) {
continue
}
if before != nil {
before()
}
b.Walk(g, from, func(n graph.Node, _ int) bool {
if during != nil {
during(n)
}
return false
})
if after != nil {
after()
}
}
}
// Visited returned whether the node n was visited during a traverse.
func (b *BreadthFirst) Visited(n graph.Node) bool {
return b.visited.Has(n.ID())
}
// Reset resets the state of the traverser for reuse.
func (b *BreadthFirst) Reset() {
b.queue.Reset()
b.visited = nil
}
// DepthFirst implements stateful depth-first graph traversal.
type DepthFirst struct {
EdgeFilter func(graph.Edge) bool
Visit func(u, v graph.Node)
stack linear.NodeStack
visited set.Int64s
}
// Walk performs a depth-first traversal of the graph g starting from the given node,
// depending on the the EdgeFilter field and the until parameter if they are non-nil. The
// traversal follows edges for which EdgeFilter(edge) is true and returns the first node
// for which until(node) is true. During the traversal, if the Visit field is non-nil, it
// is called with the nodes joined by each followed edge.
func (d *DepthFirst) Walk(g Graph, from graph.Node, until func(graph.Node) bool) graph.Node {
if d.visited == nil {
d.visited = make(set.Int64s)
}
d.stack.Push(from)
d.visited.Add(from.ID())
for d.stack.Len() > 0 {
t := d.stack.Pop()
if until != nil && until(t) {
return t
}
tid := t.ID()
for _, n := range g.From(tid) {
nid := n.ID()
if d.EdgeFilter != nil && !d.EdgeFilter(g.Edge(tid, nid)) {
continue
}
if d.visited.Has(nid) {
continue
}
if d.Visit != nil {
d.Visit(t, n)
}
d.visited.Add(nid)
d.stack.Push(n)
}
}
return nil
}
// WalkAll calls Walk for each unvisited node of the graph g using edges independent
// of their direction. The functions before and after are called prior to commencing
// and after completing each walk if they are non-nil respectively. The function
// during is called on each node as it is traversed.
func (d *DepthFirst) WalkAll(g graph.Undirected, before, after func(), during func(graph.Node)) {
d.Reset()
for _, from := range g.Nodes() {
if d.Visited(from) {
continue
}
if before != nil {
before()
}
d.Walk(g, from, func(n graph.Node) bool {
if during != nil {
during(n)
}
return false
})
if after != nil {
after()
}
}
}
// Visited returned whether the node n was visited during a traverse.
func (d *DepthFirst) Visited(n graph.Node) bool {
return d.visited.Has(n.ID())
}
// Reset resets the state of the traverser for reuse.
func (d *DepthFirst) Reset() {
d.stack = d.stack[:0]
d.visited = nil
}
@@ -0,0 +1,226 @@
// Copyright ©2015 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package graph
// Undirect converts a directed graph to an undirected graph.
type Undirect struct {
G Directed
}
var _ Undirected = Undirect{}
// Has returns whether the node exists within the graph.
func (g Undirect) Has(id int64) bool { return g.G.Has(id) }
// Nodes returns all the nodes in the graph.
func (g Undirect) Nodes() []Node { return g.G.Nodes() }
// From returns all nodes in g that can be reached directly from u.
func (g Undirect) From(uid int64) []Node {
var nodes []Node
seen := make(map[int64]struct{})
for _, n := range g.G.From(uid) {
seen[n.ID()] = struct{}{}
nodes = append(nodes, n)
}
for _, n := range g.G.To(uid) {
id := n.ID()
if _, ok := seen[id]; ok {
continue
}
seen[n.ID()] = struct{}{}
nodes = append(nodes, n)
}
return nodes
}
// HasEdgeBetween returns whether an edge exists between nodes x and y.
func (g Undirect) HasEdgeBetween(xid, yid int64) bool { return g.G.HasEdgeBetween(xid, yid) }
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
// If an edge exists, the Edge returned is an EdgePair. The weight of
// the edge is determined by applying the Merge func to the weights of the
// edges between u and v.
func (g Undirect) Edge(uid, vid int64) Edge { return g.EdgeBetween(uid, vid) }
// EdgeBetween returns the edge between nodes x and y. If an edge exists, the
// Edge returned is an EdgePair. The weight of the edge is determined by
// applying the Merge func to the weights of edges between x and y.
func (g Undirect) EdgeBetween(xid, yid int64) Edge {
fe := g.G.Edge(xid, yid)
re := g.G.Edge(yid, xid)
if fe == nil && re == nil {
return nil
}
return EdgePair{fe, re}
}
// UndirectWeighted converts a directed weighted graph to an undirected weighted graph,
// resolving edge weight conflicts.
type UndirectWeighted struct {
G WeightedDirected
// Absent is the value used to
// represent absent edge weights
// passed to Merge if the reverse
// edge is present.
Absent float64
// Merge defines how discordant edge
// weights in G are resolved. A merge
// is performed if at least one edge
// exists between the nodes being
// considered. The edges corresponding
// to the two weights are also passed,
// in the same order.
// The order of weight parameters
// passed to Merge is not defined, so
// the function should be commutative.
// If Merge is nil, the arithmetic
// mean is used to merge weights.
Merge func(x, y float64, xe, ye Edge) float64
}
var (
_ Undirected = UndirectWeighted{}
_ WeightedUndirected = UndirectWeighted{}
)
// Has returns whether the node exists within the graph.
func (g UndirectWeighted) Has(id int64) bool { return g.G.Has(id) }
// Nodes returns all the nodes in the graph.
func (g UndirectWeighted) Nodes() []Node { return g.G.Nodes() }
// From returns all nodes in g that can be reached directly from u.
func (g UndirectWeighted) From(uid int64) []Node {
var nodes []Node
seen := make(map[int64]struct{})
for _, n := range g.G.From(uid) {
seen[n.ID()] = struct{}{}
nodes = append(nodes, n)
}
for _, n := range g.G.To(uid) {
id := n.ID()
if _, ok := seen[id]; ok {
continue
}
seen[n.ID()] = struct{}{}
nodes = append(nodes, n)
}
return nodes
}
// HasEdgeBetween returns whether an edge exists between nodes x and y.
func (g UndirectWeighted) HasEdgeBetween(xid, yid int64) bool { return g.G.HasEdgeBetween(xid, yid) }
// Edge returns the edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
// If an edge exists, the Edge returned is an EdgePair. The weight of
// the edge is determined by applying the Merge func to the weights of the
// edges between u and v.
func (g UndirectWeighted) Edge(uid, vid int64) Edge { return g.WeightedEdgeBetween(uid, vid) }
// WeightedEdge returns the weighted edge from u to v if such an edge exists and nil otherwise.
// The node v must be directly reachable from u as defined by the From method.
// If an edge exists, the Edge returned is an EdgePair. The weight of
// the edge is determined by applying the Merge func to the weights of the
// edges between u and v.
func (g UndirectWeighted) WeightedEdge(uid, vid int64) WeightedEdge {
return g.WeightedEdgeBetween(uid, vid)
}
// EdgeBetween returns the edge between nodes x and y. If an edge exists, the
// Edge returned is an EdgePair. The weight of the edge is determined by
// applying the Merge func to the weights of edges between x and y.
func (g UndirectWeighted) EdgeBetween(xid, yid int64) Edge {
return g.WeightedEdgeBetween(xid, yid)
}
// WeightedEdgeBetween returns the weighted edge between nodes x and y. If an edge exists, the
// Edge returned is an EdgePair. The weight of the edge is determined by
// applying the Merge func to the weights of edges between x and y.
func (g UndirectWeighted) WeightedEdgeBetween(xid, yid int64) WeightedEdge {
fe := g.G.Edge(xid, yid)
re := g.G.Edge(yid, xid)
if fe == nil && re == nil {
return nil
}
f, ok := g.G.Weight(xid, yid)
if !ok {
f = g.Absent
}
r, ok := g.G.Weight(yid, xid)
if !ok {
r = g.Absent
}
var w float64
if g.Merge == nil {
w = (f + r) / 2
} else {
w = g.Merge(f, r, fe, re)
}
return WeightedEdgePair{EdgePair: [2]Edge{fe, re}, W: w}
}
// Weight returns the weight for the edge between x and y if Edge(x, y) returns a non-nil Edge.
// If x and y are the same node the internal node weight is returned. If there is no joining
// edge between the two nodes the weight value returned is zero. Weight returns true if an edge
// exists between x and y or if x and y have the same ID, false otherwise.
func (g UndirectWeighted) Weight(xid, yid int64) (w float64, ok bool) {
fe := g.G.Edge(xid, yid)
re := g.G.Edge(yid, xid)
f, fOk := g.G.Weight(xid, yid)
if !fOk {
f = g.Absent
}
r, rOK := g.G.Weight(yid, xid)
if !rOK {
r = g.Absent
}
ok = fOk || rOK
if g.Merge == nil {
return (f + r) / 2, ok
}
return g.Merge(f, r, fe, re), ok
}
// EdgePair is an opposed pair of directed edges.
type EdgePair [2]Edge
// From returns the from node of the first non-nil edge, or nil.
func (e EdgePair) From() Node {
if e[0] != nil {
return e[0].From()
} else if e[1] != nil {
return e[1].From()
}
return nil
}
// To returns the to node of the first non-nil edge, or nil.
func (e EdgePair) To() Node {
if e[0] != nil {
return e[0].To()
} else if e[1] != nil {
return e[1].To()
}
return nil
}
// WeightedEdgePair is an opposed pair of directed edges.
type WeightedEdgePair struct {
EdgePair
W float64
}
// Weight returns the merged edge weights of the two edges.
func (e WeightedEdgePair) Weight() float64 { return e.W }
@@ -0,0 +1,134 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
TEXT ·AxpyInc(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SI // SI = &x
MOVQ y_base+40(FP), DI // DI = &y
MOVQ n+64(FP), CX // CX = n
CMPQ CX, $0 // if n==0 { return }
JE axpyi_end
MOVQ ix+88(FP), R8 // R8 = ix // Load the first index
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ iy+96(FP), R9 // R9 = iy
SHLQ $4, R9 // R9 *= sizeof(complex128)
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
LEAQ (DI)(R9*1), DI // DI = &(y[iy])
MOVQ DI, DX // DX = DI // Separate Read/Write pointers
MOVQ incX+72(FP), R8 // R8 = incX
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ incY+80(FP), R9 // R9 = iy
SHLQ $4, R9 // R9 *= sizeof(complex128)
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
axpyi_loop: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS (SI)(R8*1), X4
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
MOVUPS (SI), X6
MOVUPS (SI)(R8*1), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX), X3
ADDPD (DX)(R9*1), X5
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
ADDPD (DX), X7
ADDPD (DX)(R9*1), X9
MOVUPS X3, (DI) // dst[i] = X_(i+1)
MOVUPS X5, (DI)(R9*1)
LEAQ (DI)(R9*2), DI
MOVUPS X7, (DI)
MOVUPS X9, (DI)(R9*1)
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
DECQ BX
JNZ axpyi_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE axpyi_end
axpyi_tail: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DI), X3
MOVUPS X3, (DI) // y[i] = X_i
ADDQ R8, SI // SI = &(SI[incX])
ADDQ R9, DI // DI = &(DI[incY])
LOOP axpyi_tail // } while --CX > 0
axpyi_end:
RET
@@ -0,0 +1,141 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
TEXT ·AxpyIncTo(SB), NOSPLIT, $0
MOVQ dst_base+0(FP), DI // DI = &dst
MOVQ x_base+56(FP), SI // SI = &x
MOVQ y_base+80(FP), DX // DX = &y
MOVQ n+104(FP), CX // CX = n
CMPQ CX, $0 // if n==0 { return }
JE axpyi_end
MOVQ ix+128(FP), R8 // R8 = ix // Load the first index
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ iy+136(FP), R9 // R9 = iy
SHLQ $4, R9 // R9 *= sizeof(complex128)
MOVQ idst+32(FP), R10 // R10 = idst
SHLQ $4, R10 // R10 *= sizeof(complex128)
LEAQ (SI)(R8*1), SI // SI = &(x[ix])
LEAQ (DX)(R9*1), DX // DX = &(y[iy])
LEAQ (DI)(R10*1), DI // DI = &(dst[idst])
MOVQ incX+112(FP), R8 // R8 = incX
SHLQ $4, R8 // R8 *= sizeof(complex128)
MOVQ incY+120(FP), R9 // R9 = incY
SHLQ $4, R9 // R9 *= sizeof(complex128)
MOVQ incDst+24(FP), R10 // R10 = incDst
SHLQ $4, R10 // R10 *= sizeof(complex128)
MOVUPS alpha+40(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ axpyi_tail // if BX == 0 { goto axpyi_tail }
axpyi_loop: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS (SI)(R8*1), X4
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
MOVUPS (SI), X6
MOVUPS (SI)(R8*1), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX), X3
ADDPD (DX)(R9*1), X5
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
ADDPD (DX), X7
ADDPD (DX)(R9*1), X9
MOVUPS X3, (DI) // dst[i] = X_(i+1)
MOVUPS X5, (DI)(R10*1)
LEAQ (DI)(R10*2), DI
MOVUPS X7, (DI)
MOVUPS X9, (DI)(R10*1)
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
DECQ BX
JNZ axpyi_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE axpyi_end
axpyi_tail: // do {
MOVUPS (SI), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX), X3
MOVUPS X3, (DI) // y[i] X_(i+1)
ADDQ R8, SI // SI += incX
ADDQ R9, DX // DX += incY
ADDQ R10, DI // DI += incDst
LOOP axpyi_tail // } while --CX > 0
axpyi_end:
RET
@@ -0,0 +1,122 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyUnitary(alpha complex128, x, y []complex128)
TEXT ·AxpyUnitary(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SI // SI = &x
MOVQ y_base+40(FP), DI // DI = &y
MOVQ x_len+24(FP), CX // CX = min( len(x), len(y) )
CMPQ y_len+48(FP), CX
CMOVQLE y_len+48(FP), CX
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
PXOR X0, X0 // Clear work registers and cache-align loop
PXOR X1, X1
MOVUPS alpha+0(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
XORQ AX, AX // i = 0
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ caxy_tail // if BX == 0 { goto caxy_tail }
caxy_loop: // do {
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS 16(SI)(AX*8), X4
MOVUPS 32(SI)(AX*8), X6
MOVUPS 48(SI)(AX*8), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DI)(AX*8), X3
ADDPD 16(DI)(AX*8), X5
ADDPD 32(DI)(AX*8), X7
ADDPD 48(DI)(AX*8), X9
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
MOVUPS X5, 16(DI)(AX*8)
MOVUPS X7, 32(DI)(AX*8)
MOVUPS X9, 48(DI)(AX*8)
ADDQ $8, AX // i += 8
DECQ BX
JNZ caxy_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
caxy_tail: // do {
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DI)(AX*8), X3
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
ADDQ $2, AX // i += 2
LOOP caxy_tail // } while --CX > 0
caxy_end:
RET
@@ -0,0 +1,123 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// MOVDDUP X2, X3
#define MOVDDUP_X2_X3 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xDA
// MOVDDUP X4, X5
#define MOVDDUP_X4_X5 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xEC
// MOVDDUP X6, X7
#define MOVDDUP_X6_X7 BYTE $0xF2; BYTE $0x0F; BYTE $0x12; BYTE $0xFE
// MOVDDUP X8, X9
#define MOVDDUP_X8_X9 BYTE $0xF2; BYTE $0x45; BYTE $0x0F; BYTE $0x12; BYTE $0xC8
// ADDSUBPD X2, X3
#define ADDSUBPD_X2_X3 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xDA
// ADDSUBPD X4, X5
#define ADDSUBPD_X4_X5 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xEC
// ADDSUBPD X6, X7
#define ADDSUBPD_X6_X7 BYTE $0x66; BYTE $0x0F; BYTE $0xD0; BYTE $0xFE
// ADDSUBPD X8, X9
#define ADDSUBPD_X8_X9 BYTE $0x66; BYTE $0x45; BYTE $0x0F; BYTE $0xD0; BYTE $0xC8
// func AxpyUnitaryTo(dst []complex128, alpha complex64, x, y []complex128)
TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
MOVQ dst_base+0(FP), DI // DI = &dst
MOVQ x_base+40(FP), SI // SI = &x
MOVQ y_base+64(FP), DX // DX = &y
MOVQ x_len+48(FP), CX // CX = min( len(x), len(y), len(dst) )
CMPQ y_len+72(FP), CX
CMOVQLE y_len+72(FP), CX
CMPQ dst_len+8(FP), CX
CMOVQLE dst_len+8(FP), CX
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
MOVUPS alpha+24(FP), X0 // X0 = { imag(a), real(a) }
MOVAPS X0, X1
SHUFPD $0x1, X1, X1 // X1 = { real(a), imag(a) }
XORQ AX, AX // i = 0
MOVAPS X0, X10 // Copy X0 and X1 for pipelining
MOVAPS X1, X11
MOVQ CX, BX
ANDQ $3, CX // CX = n % 4
SHRQ $2, BX // BX = floor( n / 4 )
JZ caxy_tail // if BX == 0 { goto caxy_tail }
caxy_loop: // do {
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS 16(SI)(AX*8), X4
MOVUPS 32(SI)(AX*8), X6
MOVUPS 48(SI)(AX*8), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3 // Load and duplicate imag elements (xi, xi)
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2 // duplicate real elements (xr, xr)
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X1, X2
MULPD X0, X3
MULPD X11, X4
MULPD X10, X5
MULPD X1, X6
MULPD X0, X7
MULPD X11, X8
MULPD X10, X9
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX)(AX*8), X3
ADDPD 16(DX)(AX*8), X5
ADDPD 32(DX)(AX*8), X7
ADDPD 48(DX)(AX*8), X9
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
MOVUPS X5, 16(DI)(AX*8)
MOVUPS X7, 32(DI)(AX*8)
MOVUPS X9, 48(DI)(AX*8)
ADDQ $8, AX // i += 8
DECQ BX
JNZ caxy_loop // } while --BX > 0
CMPQ CX, $0 // if CX == 0 { return }
JE caxy_end
caxy_tail: // Same calculation, but read in values to avoid trampling memory
MOVUPS (SI)(AX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD X1, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X0, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
// X_(i+1) = { imag(result[i]) + imag(y[i]), real(result[i]) + real(y[i]) }
ADDPD (DX)(AX*8), X3
MOVUPS X3, (DI)(AX*8) // y[i] = X_(i+1)
ADDQ $2, AX // i += 2
LOOP caxy_tail // } while --CX > 0
caxy_end:
RET
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package c128 provides complex128 vector primitives.
package c128
@@ -0,0 +1,153 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define INC_X R8
#define INCx3_X R9
#define INC_Y R10
#define INCx3_Y R11
#define NEG1 X15
#define P_NEG1 X14
// func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
TEXT ·DotcInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ n+48(FP), LEN // LEN = n
PXOR SUM, SUM // SUM = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
PXOR P_SUM, P_SUM // P_SUM = 0
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
SHLQ $4, INC_X
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
SHLQ $4, INC_Y
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
MOVQ incX+56(FP), INC_X // INC_X = incX
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
MOVQ incY+64(FP), INC_Y // INC_Y = incY
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
MOVSD $(-1.0), NEG1
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = n % 4
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ dot_tail // if n <= 4 { goto dot_tail }
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
dot_loop: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_XPTR_INCX__X5
MOVDDUP_XPTR_INCX_2__X7
MOVDDUP_XPTR_INCx3X__X9
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_8_XPTR_INCX__X4
MOVDDUP_8_XPTR_INCX_2__X6
MOVDDUP_8_XPTR_INCx3X__X8
// X_i = { -imag(x[i]), -imag(x[i]) }
MULPD NEG1, X2
MULPD P_NEG1, X4
MULPD NEG1, X6
MULPD P_NEG1, X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR), X10
MOVUPS (Y_PTR)(INC_Y*1), X11
MOVUPS (Y_PTR)(INC_Y*2), X12
MOVUPS (Y_PTR)(INCx3_Y*1), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // sum += psum
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // sum += result[i]
ADDQ INC_X, X_PTR // X_PTR += incX
ADDQ INC_Y, Y_PTR // Y_PTR += incY
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVUPS SUM, sum+88(FP)
RET
@@ -0,0 +1,143 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define IDX AX
#define I_IDX DX
#define NEG1 X15
#define P_NEG1 X14
// func DotcUnitary(x, y []complex128) (sum complex128)
TEXT ·DotcUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
PXOR SUM, SUM // sum = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
XORPS P_SUM, P_SUM // psum = 0
MOVSD $(-1.0), NEG1
SHUFPD $0, NEG1, NEG1 // { -1, -1 }
XORQ IDX, IDX // i := 0
MOVQ $1, I_IDX // j := 1
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = floor( TAIL / 4 )
SHRQ $2, LEN // LEN = TAIL % 4
JZ dot_tail // if LEN == 0 { goto dot_tail }
MOVAPS NEG1, P_NEG1 // Copy NEG1 to P_NEG1 for pipelining
dot_loop: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_16_XPTR_IDX_8__X5
MOVDDUP_32_XPTR_IDX_8__X7
MOVDDUP_48_XPTR_IDX_8__X9
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_16_XPTR_IIDX_8__X4
MOVDDUP_32_XPTR_IIDX_8__X6
MOVDDUP_48_XPTR_IIDX_8__X8
// X_i = { -imag(x[i]), -imag(x[i]) }
MULPD NEG1, X2
MULPD P_NEG1, X4
MULPD NEG1, X6
MULPD P_NEG1, X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR)(IDX*8), X10
MOVUPS 16(Y_PTR)(IDX*8), X11
MOVUPS 32(Y_PTR)(IDX*8), X12
MOVUPS 48(Y_PTR)(IDX*8), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
ADDQ $8, IDX // IDX += 8
ADDQ $8, I_IDX // I_IDX += 8
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // sum += psum
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i]) , real(x[i]) }
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
MULPD NEG1, X2 // X_i = { -imag(x[i]) , -imag(x[i]) }
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // SUM += result[i]
ADDQ $2, IDX // IDX += 2
ADDQ $2, I_IDX // I_IDX += 2
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVUPS SUM, sum+48(FP)
RET
@@ -0,0 +1,141 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define MOVDDUP_XPTR__X3 LONG $0x1E120FF2 // MOVDDUP (SI), X3
#define MOVDDUP_XPTR_INCX__X5 LONG $0x120F42F2; WORD $0x062C // MOVDDUP (SI)(R8*1), X5
#define MOVDDUP_XPTR_INCX_2__X7 LONG $0x120F42F2; WORD $0x463C // MOVDDUP (SI)(R8*2), X7
#define MOVDDUP_XPTR_INCx3X__X9 LONG $0x120F46F2; WORD $0x0E0C // MOVDDUP (SI)(R9*1), X9
#define MOVDDUP_8_XPTR__X2 LONG $0x56120FF2; BYTE $0x08 // MOVDDUP 8(SI), X2
#define MOVDDUP_8_XPTR_INCX__X4 LONG $0x120F42F2; WORD $0x0664; BYTE $0x08 // MOVDDUP 8(SI)(R8*1), X4
#define MOVDDUP_8_XPTR_INCX_2__X6 LONG $0x120F42F2; WORD $0x4674; BYTE $0x08 // MOVDDUP 8(SI)(R8*2), X6
#define MOVDDUP_8_XPTR_INCx3X__X8 LONG $0x120F46F2; WORD $0x0E44; BYTE $0x08 // MOVDDUP 8(SI)(R9*1), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define INC_X R8
#define INCx3_X R9
#define INC_Y R10
#define INCx3_Y R11
// func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
TEXT ·DotuInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ n+48(FP), LEN // LEN = n
PXOR SUM, SUM // sum = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
MOVQ ix+72(FP), INC_X // INC_X = ix * sizeof(complex128)
SHLQ $4, INC_X
MOVQ iy+80(FP), INC_Y // INC_Y = iy * sizeof(complex128)
SHLQ $4, INC_Y
LEAQ (X_PTR)(INC_X*1), X_PTR // X_PTR = &(X_PTR[ix])
LEAQ (Y_PTR)(INC_Y*1), Y_PTR // Y_PTR = &(Y_PTR[iy])
MOVQ incX+56(FP), INC_X // INC_X = incX
SHLQ $4, INC_X // INC_X *= sizeof(complex128)
MOVQ incY+64(FP), INC_Y // INC_Y = incY
SHLQ $4, INC_Y // INC_Y *= sizeof(complex128)
MOVQ LEN, TAIL
ANDQ $3, TAIL // LEN = LEN % 4
SHRQ $2, LEN // LEN = floor( LEN / 4 )
JZ dot_tail // if LEN <= 4 { goto dot_tail }
PXOR P_SUM, P_SUM // psum = 0
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = 3 * incX * sizeof(complex128)
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = 3 * incY * sizeof(complex128)
dot_loop: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_XPTR_INCX__X5
MOVDDUP_XPTR_INCX_2__X7
MOVDDUP_XPTR_INCx3X__X9
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_8_XPTR_INCX__X4
MOVDDUP_8_XPTR_INCX_2__X6
MOVDDUP_8_XPTR_INCx3X__X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR), X10
MOVUPS (Y_PTR)(INC_Y*1), X11
MOVUPS (Y_PTR)(INC_Y*2), X12
MOVUPS (Y_PTR)(INCx3_Y*1), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[incX*4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[incY*4])
DECQ LEN
JNZ dot_loop // } while --BX > 0
ADDPD P_SUM, SUM // sum += psum
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_8_XPTR__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVUPS (Y_PTR), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // sum += result[i]
ADDQ INC_X, X_PTR // X_PTR += incX
ADDQ INC_Y, Y_PTR // Y_PTR += incY
DECQ TAIL // --TAIL
JNZ dot_tail // } while TAIL > 0
dot_end:
MOVUPS SUM, sum+88(FP)
RET
@@ -0,0 +1,130 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define MOVDDUP_XPTR_IDX_8__X3 LONG $0x1C120FF2; BYTE $0xC6 // MOVDDUP (SI)(AX*8), X3
#define MOVDDUP_16_XPTR_IDX_8__X5 LONG $0x6C120FF2; WORD $0x10C6 // MOVDDUP 16(SI)(AX*8), X5
#define MOVDDUP_32_XPTR_IDX_8__X7 LONG $0x7C120FF2; WORD $0x20C6 // MOVDDUP 32(SI)(AX*8), X7
#define MOVDDUP_48_XPTR_IDX_8__X9 LONG $0x120F44F2; WORD $0xC64C; BYTE $0x30 // MOVDDUP 48(SI)(AX*8), X9
#define MOVDDUP_XPTR_IIDX_8__X2 LONG $0x14120FF2; BYTE $0xD6 // MOVDDUP (SI)(DX*8), X2
#define MOVDDUP_16_XPTR_IIDX_8__X4 LONG $0x64120FF2; WORD $0x10D6 // MOVDDUP 16(SI)(DX*8), X4
#define MOVDDUP_32_XPTR_IIDX_8__X6 LONG $0x74120FF2; WORD $0x20D6 // MOVDDUP 32(SI)(DX*8), X6
#define MOVDDUP_48_XPTR_IIDX_8__X8 LONG $0x120F44F2; WORD $0xD644; BYTE $0x30 // MOVDDUP 48(SI)(DX*8), X8
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define SUM X0
#define P_SUM X1
#define IDX AX
#define I_IDX DX
// func DotuUnitary(x, y []complex128) (sum complex128)
TEXT ·DotuUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
PXOR SUM, SUM // SUM = 0
CMPQ LEN, $0 // if LEN == 0 { return }
JE dot_end
PXOR P_SUM, P_SUM // P_SUM = 0
XORQ IDX, IDX // IDX = 0
MOVQ $1, DX // j = 1
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = floor( LEN / 4 )
SHRQ $2, LEN // LEN = LEN % 4
JZ dot_tail // if LEN == 0 { goto dot_tail }
dot_loop: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_16_XPTR_IDX_8__X5
MOVDDUP_32_XPTR_IDX_8__X7
MOVDDUP_48_XPTR_IDX_8__X9
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]), imag(x[i]) }
MOVDDUP_16_XPTR_IIDX_8__X4
MOVDDUP_32_XPTR_IIDX_8__X6
MOVDDUP_48_XPTR_IIDX_8__X8
// X_j = { imag(y[i]), real(y[i]) }
MOVUPS (Y_PTR)(IDX*8), X10
MOVUPS 16(Y_PTR)(IDX*8), X11
MOVUPS 32(Y_PTR)(IDX*8), X12
MOVUPS 48(Y_PTR)(IDX*8), X13
// X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
MULPD X10, X3
MULPD X11, X5
MULPD X12, X7
MULPD X13, X9
// X_j = { real(y[i]), imag(y[i]) }
SHUFPD $0x1, X10, X10
SHUFPD $0x1, X11, X11
SHUFPD $0x1, X12, X12
SHUFPD $0x1, X13, X13
// X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
MULPD X10, X2
MULPD X11, X4
MULPD X12, X6
MULPD X13, X8
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
// psum += result[i]
ADDPD X3, SUM
ADDPD X5, P_SUM
ADDPD X7, SUM
ADDPD X9, P_SUM
ADDQ $8, IDX // IDX += 8
ADDQ $8, I_IDX // I_IDX += 8
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVDDUP_XPTR_IDX_8__X3 // X_(i+1) = { real(x[i] , real(x[i]) }
MOVDDUP_XPTR_IIDX_8__X2 // X_i = { imag(x[i]) , imag(x[i]) }
MOVUPS (Y_PTR)(IDX*8), X10 // X_j = { imag(y[i]) , real(y[i]) }
MULPD X10, X3 // X_(i+1) = { imag(a) * real(x[i]), real(a) * real(x[i]) }
SHUFPD $0x1, X10, X10 // X_j = { real(y[i]) , imag(y[i]) }
MULPD X10, X2 // X_i = { real(a) * imag(x[i]), imag(a) * imag(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(a)*real(x[i]) + real(a)*imag(x[i]),
// real(result[i]): real(a)*real(x[i]) - imag(a)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDPD X3, SUM // psum += result[i]
ADDQ $2, IDX // IDX += 2
ADDQ $2, I_IDX // I_IDX += 2
DECQ TAIL // --TAIL
JNZ dot_tail // } while TAIL > 0
dot_end:
MOVUPS SUM, sum+48(FP)
RET
@@ -0,0 +1,69 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define TAIL BX
#define INC R9
#define INC3 R10
#define ALPHA X0
#define ALPHA_2 X1
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
// func DscalInc(alpha float64, x []complex128, n, inc uintptr)
TEXT ·DscalInc(SB), NOSPLIT, $0
MOVQ x_base+8(FP), SRC // SRC = &x
MOVQ n+32(FP), LEN // LEN = n
CMPQ LEN, $0 // if LEN == 0 { return }
JE dscal_end
MOVDDUP_ALPHA // ALPHA = alpha
MOVQ inc+40(FP), INC // INC = inc
SHLQ $4, INC // INC = INC * sizeof(complex128)
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA and ALPHA_2 for pipelining
MOVQ LEN, TAIL // TAIL = LEN
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
dscal_loop: // do {
MOVUPS (SRC), X2 // X_i = x[i]
MOVUPS (SRC)(INC*1), X3
MOVUPS (SRC)(INC*2), X4
MOVUPS (SRC)(INC3*1), X5
MULPD ALPHA, X2 // X_i *= ALPHA
MULPD ALPHA_2, X3
MULPD ALPHA, X4
MULPD ALPHA_2, X5
MOVUPS X2, (DST) // x[i] = X_i
MOVUPS X3, (DST)(INC*1)
MOVUPS X4, (DST)(INC*2)
MOVUPS X5, (DST)(INC3*1)
LEAQ (SRC)(INC*4), SRC // SRC += INC*4
DECQ LEN
JNZ dscal_loop // } while --LEN > 0
dscal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JE dscal_end // if TAIL == 0 { return }
dscal_tail_loop: // do {
MOVUPS (SRC), X2 // X_i = x[i]
MULPD ALPHA, X2 // X_i *= ALPHA
MOVUPS X2, (DST) // x[i] = X_i
ADDQ INC, SRC // SRC += INC
DECQ TAIL
JNZ dscal_tail_loop // } while --TAIL > 0
dscal_end:
RET
@@ -0,0 +1,66 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define IDX AX
#define TAIL BX
#define ALPHA X0
#define ALPHA_2 X1
#define MOVDDUP_ALPHA LONG $0x44120FF2; WORD $0x0824 // MOVDDUP 8(SP), X0
// func DscalUnitary(alpha float64, x []complex128)
TEXT ·DscalUnitary(SB), NOSPLIT, $0
MOVQ x_base+8(FP), SRC // SRC = &x
MOVQ x_len+16(FP), LEN // LEN = len(x)
CMPQ LEN, $0 // if LEN == 0 { return }
JE dscal_end
MOVDDUP_ALPHA // ALPHA = alpha
XORQ IDX, IDX // IDX = 0
MOVUPS ALPHA, ALPHA_2 // Copy ALPHA to ALPHA_2 for pipelining
MOVQ LEN, TAIL // TAIL = LEN
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ dscal_tail // if LEN == 0 { goto dscal_tail }
dscal_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
MOVUPS 16(SRC)(IDX*8), X3
MOVUPS 32(SRC)(IDX*8), X4
MOVUPS 48(SRC)(IDX*8), X5
MULPD ALPHA, X2 // X_i *= ALPHA
MULPD ALPHA_2, X3
MULPD ALPHA, X4
MULPD ALPHA_2, X5
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
MOVUPS X3, 16(DST)(IDX*8)
MOVUPS X4, 32(DST)(IDX*8)
MOVUPS X5, 48(DST)(IDX*8)
ADDQ $8, IDX // IDX += 8
DECQ LEN
JNZ dscal_loop // } while --LEN > 0
dscal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JZ dscal_end // if TAIL == 0 { return }
dscal_tail_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = x[i]
MULPD ALPHA, X2 // X_i *= ALPHA
MOVUPS X2, (DST)(IDX*8) // x[i] = X_i
ADDQ $2, IDX // IDX += 2
DECQ TAIL
JNZ dscal_tail_loop // } while --TAIL > 0
dscal_end:
RET
@@ -0,0 +1,31 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package c128
// ScalUnitaryTo is
// for i, v := range x {
// dst[i] = alpha * v
// }
func ScalUnitaryTo(dst []complex128, alpha complex128, x []complex128) {
for i, v := range x {
dst[i] = alpha * v
}
}
// ScalIncTo is
// var idst, ix uintptr
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha * x[ix]
// ix += incX
// idst += incDst
// }
func ScalIncTo(dst []complex128, incDst uintptr, alpha complex128, x []complex128, n, incX uintptr) {
var idst, ix uintptr
for i := 0; i < int(n); i++ {
dst[idst] = alpha * x[ix]
ix += incX
idst += incDst
}
}
@@ -0,0 +1,116 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define IDX AX
#define TAIL BX
#define ALPHA X0
#define ALPHA_C X1
#define ALPHA2 X10
#define ALPHA_C2 X11
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
// func ScalUnitary(alpha complex128, x []complex128)
TEXT ·ScalUnitary(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SRC // SRC = &x
MOVQ x_len+24(FP), LEN // LEN = len(x)
CMPQ LEN, $0 // if LEN == 0 { return }
JE scal_end
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
MOVAPS ALPHA, ALPHA_C
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
XORQ IDX, IDX // IDX = 0
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
MOVAPS ALPHA_C, ALPHA_C2
MOVQ LEN, TAIL
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ scal_tail // if BX == 0 { goto scal_tail }
scal_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS 16(SRC)(IDX*8), X4
MOVUPS 32(SRC)(IDX*8), X6
MOVUPS 48(SRC)(IDX*8), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
MULPD ALPHA_C, X2
MULPD ALPHA, X3
MULPD ALPHA_C2, X4
MULPD ALPHA2, X5
MULPD ALPHA_C, X6
MULPD ALPHA, X7
MULPD ALPHA_C2, X8
MULPD ALPHA2, X9
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
MOVUPS X5, 16(DST)(IDX*8)
MOVUPS X7, 32(DST)(IDX*8)
MOVUPS X9, 48(DST)(IDX*8)
ADDQ $8, IDX // IDX += 8
DECQ LEN
JNZ scal_loop // } while --LEN > 0
scal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JZ scal_end // if TAIL == 0 { return }
scal_tail_loop: // do {
MOVUPS (SRC)(IDX*8), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
MOVUPS X3, (DST)(IDX*8) // x[i] = X_(i+1)
ADDQ $2, IDX // IDX += 2
DECQ TAIL
JNZ scal_tail_loop // } while --LEN > 0
scal_end:
RET
@@ -0,0 +1,121 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define SRC SI
#define DST SI
#define LEN CX
#define TAIL BX
#define INC R9
#define INC3 R10
#define ALPHA X0
#define ALPHA_C X1
#define ALPHA2 X10
#define ALPHA_C2 X11
#define MOVDDUP_X2_X3 LONG $0xDA120FF2 // MOVDDUP X2, X3
#define MOVDDUP_X4_X5 LONG $0xEC120FF2 // MOVDDUP X4, X5
#define MOVDDUP_X6_X7 LONG $0xFE120FF2 // MOVDDUP X6, X7
#define MOVDDUP_X8_X9 LONG $0x120F45F2; BYTE $0xC8 // MOVDDUP X8, X9
#define ADDSUBPD_X2_X3 LONG $0xDAD00F66 // ADDSUBPD X2, X3
#define ADDSUBPD_X4_X5 LONG $0xECD00F66 // ADDSUBPD X4, X5
#define ADDSUBPD_X6_X7 LONG $0xFED00F66 // ADDSUBPD X6, X7
#define ADDSUBPD_X8_X9 LONG $0xD00F4566; BYTE $0xC8 // ADDSUBPD X8, X9
// func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
TEXT ·ScalInc(SB), NOSPLIT, $0
MOVQ x_base+16(FP), SRC // SRC = &x
MOVQ n+40(FP), LEN // LEN = len(x)
CMPQ LEN, $0
JE scal_end // if LEN == 0 { return }
MOVQ inc+48(FP), INC // INC = inc
SHLQ $4, INC // INC = INC * sizeof(complex128)
LEAQ (INC)(INC*2), INC3 // INC3 = 3 * INC
MOVUPS alpha+0(FP), ALPHA // ALPHA = { imag(alpha), real(alpha) }
MOVAPS ALPHA, ALPHA_C
SHUFPD $0x1, ALPHA_C, ALPHA_C // ALPHA_C = { real(alpha), imag(alpha) }
MOVAPS ALPHA, ALPHA2 // Copy ALPHA and ALPHA_C for pipelining
MOVAPS ALPHA_C, ALPHA_C2
MOVQ LEN, TAIL
SHRQ $2, LEN // LEN = floor( n / 4 )
JZ scal_tail // if BX == 0 { goto scal_tail }
scal_loop: // do {
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVUPS (SRC)(INC*1), X4
MOVUPS (SRC)(INC*2), X6
MOVUPS (SRC)(INC3*1), X8
// X_(i+1) = { real(x[i], real(x[i]) }
MOVDDUP_X2_X3
MOVDDUP_X4_X5
MOVDDUP_X6_X7
MOVDDUP_X8_X9
// X_i = { imag(x[i]), imag(x[i]) }
SHUFPD $0x3, X2, X2
SHUFPD $0x3, X4, X4
SHUFPD $0x3, X6, X6
SHUFPD $0x3, X8, X8
// X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
// X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
MULPD ALPHA_C, X2
MULPD ALPHA, X3
MULPD ALPHA_C2, X4
MULPD ALPHA2, X5
MULPD ALPHA_C, X6
MULPD ALPHA, X7
MULPD ALPHA_C2, X8
MULPD ALPHA2, X9
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
ADDSUBPD_X4_X5
ADDSUBPD_X6_X7
ADDSUBPD_X8_X9
MOVUPS X3, (DST) // x[i] = X_(i+1)
MOVUPS X5, (DST)(INC*1)
MOVUPS X7, (DST)(INC*2)
MOVUPS X9, (DST)(INC3*1)
LEAQ (SRC)(INC*4), SRC // SRC = &(SRC[inc*4])
DECQ LEN
JNZ scal_loop // } while --BX > 0
scal_tail:
ANDQ $3, TAIL // TAIL = TAIL % 4
JE scal_end // if TAIL == 0 { return }
scal_tail_loop: // do {
MOVUPS (SRC), X2 // X_i = { imag(x[i]), real(x[i]) }
MOVDDUP_X2_X3 // X_(i+1) = { real(x[i], real(x[i]) }
SHUFPD $0x3, X2, X2 // X_i = { imag(x[i]), imag(x[i]) }
MULPD ALPHA_C, X2 // X_i = { real(ALPHA) * imag(x[i]), imag(ALPHA) * imag(x[i]) }
MULPD ALPHA, X3 // X_(i+1) = { imag(ALPHA) * real(x[i]), real(ALPHA) * real(x[i]) }
// X_(i+1) = {
// imag(result[i]): imag(ALPHA)*real(x[i]) + real(ALPHA)*imag(x[i]),
// real(result[i]): real(ALPHA)*real(x[i]) - imag(ALPHA)*imag(x[i])
// }
ADDSUBPD_X2_X3
MOVUPS X3, (DST) // x[i] = X_i
ADDQ INC, SRC // SRC = &(SRC[incX])
DECQ TAIL
JNZ scal_tail_loop // } while --TAIL > 0
scal_end:
RET
@@ -0,0 +1,96 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !noasm,!appengine,!safe
package c128
// AxpyUnitary is
// for i, v := range x {
// y[i] += alpha * v
// }
func AxpyUnitary(alpha complex128, x, y []complex128)
// AxpyUnitaryTo is
// for i, v := range x {
// dst[i] = alpha*v + y[i]
// }
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128)
// AxpyInc is
// for i := 0; i < int(n); i++ {
// y[iy] += alpha * x[ix]
// ix += incX
// iy += incY
// }
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
// AxpyIncTo is
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha*x[ix] + y[iy]
// ix += incX
// iy += incY
// idst += incDst
// }
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr)
// DscalUnitary is
// for i, v := range x {
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
// }
func DscalUnitary(alpha float64, x []complex128)
// DscalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
// ix += inc
// }
func DscalInc(alpha float64, x []complex128, n, inc uintptr)
// ScalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] *= alpha
// ix += incX
// }
func ScalInc(alpha complex128, x []complex128, n, inc uintptr)
// ScalUnitary is
// for i := range x {
// x[i] *= alpha
// }
func ScalUnitary(alpha complex128, x []complex128)
// DotcUnitary is
// for i, v := range x {
// sum += y[i] * cmplx.Conj(v)
// }
// return sum
func DotcUnitary(x, y []complex128) (sum complex128)
// DotcInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * cmplx.Conj(x[ix])
// ix += incX
// iy += incY
// }
// return sum
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
// DotuUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotuUnitary(x, y []complex128) (sum complex128)
// DotuInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128)
@@ -0,0 +1,163 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm appengine safe
package c128
import "math/cmplx"
// AxpyUnitary is
// for i, v := range x {
// y[i] += alpha * v
// }
func AxpyUnitary(alpha complex128, x, y []complex128) {
for i, v := range x {
y[i] += alpha * v
}
}
// AxpyUnitaryTo is
// for i, v := range x {
// dst[i] = alpha*v + y[i]
// }
func AxpyUnitaryTo(dst []complex128, alpha complex128, x, y []complex128) {
for i, v := range x {
dst[i] = alpha*v + y[i]
}
}
// AxpyInc is
// for i := 0; i < int(n); i++ {
// y[iy] += alpha * x[ix]
// ix += incX
// iy += incY
// }
func AxpyInc(alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
for i := 0; i < int(n); i++ {
y[iy] += alpha * x[ix]
ix += incX
iy += incY
}
}
// AxpyIncTo is
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha*x[ix] + y[iy]
// ix += incX
// iy += incY
// idst += incDst
// }
func AxpyIncTo(dst []complex128, incDst, idst uintptr, alpha complex128, x, y []complex128, n, incX, incY, ix, iy uintptr) {
for i := 0; i < int(n); i++ {
dst[idst] = alpha*x[ix] + y[iy]
ix += incX
iy += incY
idst += incDst
}
}
// DscalUnitary is
// for i, v := range x {
// x[i] = complex(real(v)*alpha, imag(v)*alpha)
// }
func DscalUnitary(alpha float64, x []complex128) {
for i, v := range x {
x[i] = complex(real(v)*alpha, imag(v)*alpha)
}
}
// DscalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
// ix += inc
// }
func DscalInc(alpha float64, x []complex128, n, inc uintptr) {
var ix uintptr
for i := 0; i < int(n); i++ {
x[ix] = complex(real(x[ix])*alpha, imag(x[ix])*alpha)
ix += inc
}
}
// ScalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] *= alpha
// ix += incX
// }
func ScalInc(alpha complex128, x []complex128, n, inc uintptr) {
var ix uintptr
for i := 0; i < int(n); i++ {
x[ix] *= alpha
ix += inc
}
}
// ScalUnitary is
// for i := range x {
// x[i] *= alpha
// }
func ScalUnitary(alpha complex128, x []complex128) {
for i := range x {
x[i] *= alpha
}
}
// DotcUnitary is
// for i, v := range x {
// sum += y[i] * cmplx.Conj(v)
// }
// return sum
func DotcUnitary(x, y []complex128) (sum complex128) {
for i, v := range x {
sum += y[i] * cmplx.Conj(v)
}
return sum
}
// DotcInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * cmplx.Conj(x[ix])
// ix += incX
// iy += incY
// }
// return sum
func DotcInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
for i := 0; i < int(n); i++ {
sum += y[iy] * cmplx.Conj(x[ix])
ix += incX
iy += incY
}
return sum
}
// DotuUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotuUnitary(x, y []complex128) (sum complex128) {
for i, v := range x {
sum += y[i] * v
}
return sum
}
// DotuInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotuInc(x, y []complex128, n, incX, incY, ix, iy uintptr) (sum complex128) {
for i := 0; i < int(n); i++ {
sum += y[iy] * x[ix]
ix += incX
iy += incY
}
return sum
}
@@ -0,0 +1,73 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
TEXT ·AxpyInc(SB), NOSPLIT, $0
MOVQ n+56(FP), CX // CX = n
CMPQ CX, $0 // if n==0 { return }
JLE axpyi_end
MOVQ x_base+8(FP), SI // SI = &x
MOVQ y_base+32(FP), DI // DI = &y
MOVQ ix+80(FP), R8 // R8 = ix
MOVQ iy+88(FP), R9 // R9 = iy
LEAQ (SI)(R8*4), SI // SI = &(x[ix])
LEAQ (DI)(R9*4), DI // DI = &(y[iy])
MOVQ DI, DX // DX = DI Read Pointer for y
MOVQ incX+64(FP), R8 // R8 = incX
SHLQ $2, R8 // R8 *= sizeof(float32)
MOVQ incY+72(FP), R9 // R9 = incY
SHLQ $2, R9 // R9 *= sizeof(float32)
MOVSS alpha+0(FP), X0 // X0 = alpha
MOVSS X0, X1 // X1 = X0 // for pipelining
MOVQ CX, BX
ANDQ $3, BX // BX = n % 4
SHRQ $2, CX // CX = floor( n / 4 )
JZ axpyi_tail_start // if CX == 0 { goto axpyi_tail_start }
axpyi_loop: // Loop unrolled 4x do {
MOVSS (SI), X2 // X_i = x[i]
MOVSS (SI)(R8*1), X3
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
MOVSS (SI), X4
MOVSS (SI)(R8*1), X5
MULSS X1, X2 // X_i *= a
MULSS X0, X3
MULSS X1, X4
MULSS X0, X5
ADDSS (DX), X2 // X_i += y[i]
ADDSS (DX)(R9*1), X3
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
ADDSS (DX), X4
ADDSS (DX)(R9*1), X5
MOVSS X2, (DI) // y[i] = X_i
MOVSS X3, (DI)(R9*1)
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
MOVSS X4, (DI)
MOVSS X5, (DI)(R9*1)
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) // Increment addresses
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
LEAQ (DI)(R9*2), DI // DI = &(DI[incY*2])
LOOP axpyi_loop // } while --CX > 0
CMPQ BX, $0 // if BX == 0 { return }
JE axpyi_end
axpyi_tail_start: // Reset loop registers
MOVQ BX, CX // Loop counter: CX = BX
axpyi_tail: // do {
MOVSS (SI), X2 // X2 = x[i]
MULSS X1, X2 // X2 *= a
ADDSS (DI), X2 // X2 += y[i]
MOVSS X2, (DI) // y[i] = X2
ADDQ R8, SI // SI = &(SI[incX])
ADDQ R9, DI // DI = &(DI[incY])
LOOP axpyi_tail // } while --CX > 0
axpyi_end:
RET
@@ -0,0 +1,78 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
TEXT ·AxpyIncTo(SB), NOSPLIT, $0
MOVQ n+96(FP), CX // CX = n
CMPQ CX, $0 // if n==0 { return }
JLE axpyi_end
MOVQ dst_base+0(FP), DI // DI = &dst
MOVQ x_base+48(FP), SI // SI = &x
MOVQ y_base+72(FP), DX // DX = &y
MOVQ ix+120(FP), R8 // R8 = ix // Load the first index
MOVQ iy+128(FP), R9 // R9 = iy
MOVQ idst+32(FP), R10 // R10 = idst
LEAQ (SI)(R8*4), SI // SI = &(x[ix])
LEAQ (DX)(R9*4), DX // DX = &(y[iy])
LEAQ (DI)(R10*4), DI // DI = &(dst[idst])
MOVQ incX+104(FP), R8 // R8 = incX
SHLQ $2, R8 // R8 *= sizeof(float32)
MOVQ incY+112(FP), R9 // R9 = incY
SHLQ $2, R9 // R9 *= sizeof(float32)
MOVQ incDst+24(FP), R10 // R10 = incDst
SHLQ $2, R10 // R10 *= sizeof(float32)
MOVSS alpha+40(FP), X0 // X0 = alpha
MOVSS X0, X1 // X1 = X0 // for pipelining
MOVQ CX, BX
ANDQ $3, BX // BX = n % 4
SHRQ $2, CX // CX = floor( n / 4 )
JZ axpyi_tail_start // if CX == 0 { goto axpyi_tail_start }
axpyi_loop: // Loop unrolled 4x do {
MOVSS (SI), X2 // X_i = x[i]
MOVSS (SI)(R8*1), X3
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2])
MOVSS (SI), X4
MOVSS (SI)(R8*1), X5
MULSS X1, X2 // X_i *= a
MULSS X0, X3
MULSS X1, X4
MULSS X0, X5
ADDSS (DX), X2 // X_i += y[i]
ADDSS (DX)(R9*1), X3
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
ADDSS (DX), X4
ADDSS (DX)(R9*1), X5
MOVSS X2, (DI) // dst[i] = X_i
MOVSS X3, (DI)(R10*1)
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
MOVSS X4, (DI)
MOVSS X5, (DI)(R10*1)
LEAQ (SI)(R8*2), SI // SI = &(SI[incX*2]) // Increment addresses
LEAQ (DX)(R9*2), DX // DX = &(DX[incY*2])
LEAQ (DI)(R10*2), DI // DI = &(DI[incDst*2])
LOOP axpyi_loop // } while --CX > 0
CMPQ BX, $0 // if BX == 0 { return }
JE axpyi_end
axpyi_tail_start: // Reset loop registers
MOVQ BX, CX // Loop counter: CX = BX
axpyi_tail: // do {
MOVSS (SI), X2 // X2 = x[i]
MULSS X1, X2 // X2 *= a
ADDSS (DX), X2 // X2 += y[i]
MOVSS X2, (DI) // dst[i] = X2
ADDQ R8, SI // SI = &(SI[incX])
ADDQ R9, DX // DX = &(DX[incY])
ADDQ R10, DI // DI = &(DI[incY])
LOOP axpyi_tail // } while --CX > 0
axpyi_end:
RET
@@ -0,0 +1,97 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// func AxpyUnitary(alpha float32, x, y []float32)
TEXT ·AxpyUnitary(SB), NOSPLIT, $0
MOVQ x_base+8(FP), SI // SI = &x
MOVQ y_base+32(FP), DI // DI = &y
MOVQ x_len+16(FP), BX // BX = min( len(x), len(y) )
CMPQ y_len+40(FP), BX
CMOVQLE y_len+40(FP), BX
CMPQ BX, $0 // if BX == 0 { return }
JE axpy_end
MOVSS alpha+0(FP), X0
SHUFPS $0, X0, X0 // X0 = { a, a, a, a }
XORQ AX, AX // i = 0
PXOR X2, X2 // 2 NOP instructions (PXOR) to align
PXOR X3, X3 // loop to cache line
MOVQ DI, CX
ANDQ $0xF, CX // Align on 16-byte boundary for ADDPS
JZ axpy_no_trim // if CX == 0 { goto axpy_no_trim }
XORQ $0xF, CX // CX = 4 - floor( BX % 16 / 4 )
INCQ CX
SHRQ $2, CX
axpy_align: // Trim first value(s) in unaligned buffer do {
MOVSS (SI)(AX*4), X2 // X2 = x[i]
MULSS X0, X2 // X2 *= a
ADDSS (DI)(AX*4), X2 // X2 += y[i]
MOVSS X2, (DI)(AX*4) // y[i] = X2
INCQ AX // i++
DECQ BX
JZ axpy_end // if --BX == 0 { return }
LOOP axpy_align // } while --CX > 0
axpy_no_trim:
MOVUPS X0, X1 // Copy X0 to X1 for pipelining
MOVQ BX, CX
ANDQ $0xF, BX // BX = len % 16
SHRQ $4, CX // CX = int( len / 16 )
JZ axpy_tail4_start // if CX == 0 { return }
axpy_loop: // Loop unrolled 16x do {
MOVUPS (SI)(AX*4), X2 // X2 = x[i:i+4]
MOVUPS 16(SI)(AX*4), X3
MOVUPS 32(SI)(AX*4), X4
MOVUPS 48(SI)(AX*4), X5
MULPS X0, X2 // X2 *= a
MULPS X1, X3
MULPS X0, X4
MULPS X1, X5
ADDPS (DI)(AX*4), X2 // X2 += y[i:i+4]
ADDPS 16(DI)(AX*4), X3
ADDPS 32(DI)(AX*4), X4
ADDPS 48(DI)(AX*4), X5
MOVUPS X2, (DI)(AX*4) // dst[i:i+4] = X2
MOVUPS X3, 16(DI)(AX*4)
MOVUPS X4, 32(DI)(AX*4)
MOVUPS X5, 48(DI)(AX*4)
ADDQ $16, AX // i += 16
LOOP axpy_loop // while (--CX) > 0
CMPQ BX, $0 // if BX == 0 { return }
JE axpy_end
axpy_tail4_start: // Reset loop counter for 4-wide tail loop
MOVQ BX, CX // CX = floor( BX / 4 )
SHRQ $2, CX
JZ axpy_tail_start // if CX == 0 { goto axpy_tail_start }
axpy_tail4: // Loop unrolled 4x do {
MOVUPS (SI)(AX*4), X2 // X2 = x[i]
MULPS X0, X2 // X2 *= a
ADDPS (DI)(AX*4), X2 // X2 += y[i]
MOVUPS X2, (DI)(AX*4) // y[i] = X2
ADDQ $4, AX // i += 4
LOOP axpy_tail4 // } while --CX > 0
axpy_tail_start: // Reset loop counter for 1-wide tail loop
MOVQ BX, CX // CX = BX % 4
ANDQ $3, CX
JZ axpy_end // if CX == 0 { return }
axpy_tail:
MOVSS (SI)(AX*4), X1 // X1 = x[i]
MULSS X0, X1 // X1 *= a
ADDSS (DI)(AX*4), X1 // X1 += y[i]
MOVSS X1, (DI)(AX*4) // y[i] = X1
INCQ AX // i++
LOOP axpy_tail // } while --CX > 0
axpy_end:
RET
@@ -0,0 +1,98 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
// func AxpyUnitaryTo(dst []float32, alpha float32, x, y []float32)
TEXT ·AxpyUnitaryTo(SB), NOSPLIT, $0
MOVQ dst_base+0(FP), DI // DI = &dst
MOVQ x_base+32(FP), SI // SI = &x
MOVQ y_base+56(FP), DX // DX = &y
MOVQ x_len+40(FP), BX // BX = min( len(x), len(y), len(dst) )
CMPQ y_len+64(FP), BX
CMOVQLE y_len+64(FP), BX
CMPQ dst_len+8(FP), BX
CMOVQLE dst_len+8(FP), BX
CMPQ BX, $0 // if BX == 0 { return }
JE axpy_end
MOVSS alpha+24(FP), X0
SHUFPS $0, X0, X0 // X0 = { a, a, a, a, }
XORQ AX, AX // i = 0
MOVQ DX, CX
ANDQ $0xF, CX // Align on 16-byte boundary for ADDPS
JZ axpy_no_trim // if CX == 0 { goto axpy_no_trim }
XORQ $0xF, CX // CX = 4 - floor ( B % 16 / 4 )
INCQ CX
SHRQ $2, CX
axpy_align: // Trim first value(s) in unaligned buffer do {
MOVSS (SI)(AX*4), X2 // X2 = x[i]
MULSS X0, X2 // X2 *= a
ADDSS (DX)(AX*4), X2 // X2 += y[i]
MOVSS X2, (DI)(AX*4) // y[i] = X2
INCQ AX // i++
DECQ BX
JZ axpy_end // if --BX == 0 { return }
LOOP axpy_align // } while --CX > 0
axpy_no_trim:
MOVUPS X0, X1 // Copy X0 to X1 for pipelining
MOVQ BX, CX
ANDQ $0xF, BX // BX = len % 16
SHRQ $4, CX // CX = floor( len / 16 )
JZ axpy_tail4_start // if CX == 0 { return }
axpy_loop: // Loop unrolled 16x do {
MOVUPS (SI)(AX*4), X2 // X2 = x[i:i+4]
MOVUPS 16(SI)(AX*4), X3
MOVUPS 32(SI)(AX*4), X4
MOVUPS 48(SI)(AX*4), X5
MULPS X0, X2 // X2 *= a
MULPS X1, X3
MULPS X0, X4
MULPS X1, X5
ADDPS (DX)(AX*4), X2 // X2 += y[i:i+4]
ADDPS 16(DX)(AX*4), X3
ADDPS 32(DX)(AX*4), X4
ADDPS 48(DX)(AX*4), X5
MOVUPS X2, (DI)(AX*4) // dst[i:i+4] = X2
MOVUPS X3, 16(DI)(AX*4)
MOVUPS X4, 32(DI)(AX*4)
MOVUPS X5, 48(DI)(AX*4)
ADDQ $16, AX // i += 16
LOOP axpy_loop // while (--CX) > 0
CMPQ BX, $0 // if BX == 0 { return }
JE axpy_end
axpy_tail4_start: // Reset loop counter for 4-wide tail loop
MOVQ BX, CX // CX = floor( BX / 4 )
SHRQ $2, CX
JZ axpy_tail_start // if CX == 0 { goto axpy_tail_start }
axpy_tail4: // Loop unrolled 4x do {
MOVUPS (SI)(AX*4), X2 // X2 = x[i]
MULPS X0, X2 // X2 *= a
ADDPS (DX)(AX*4), X2 // X2 += y[i]
MOVUPS X2, (DI)(AX*4) // y[i] = X2
ADDQ $4, AX // i += 4
LOOP axpy_tail4 // } while --CX > 0
axpy_tail_start: // Reset loop counter for 1-wide tail loop
MOVQ BX, CX // CX = BX % 4
ANDQ $3, CX
JZ axpy_end // if CX == 0 { return }
axpy_tail:
MOVSS (SI)(AX*4), X1 // X1 = x[i]
MULSS X0, X1 // X1 *= a
ADDSS (DX)(AX*4), X1 // X1 += y[i]
MOVSS X1, (DI)(AX*4) // y[i] = X1
INCQ AX // i++
LOOP axpy_tail // } while --CX > 0
axpy_end:
RET
@@ -0,0 +1,91 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define INC_X R8
#define INCx3_X R10
#define INC_Y R9
#define INCx3_Y R11
#define SUM X0
#define P_SUM X1
// func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
TEXT ·DdotInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ n+48(FP), LEN // LEN = n
PXOR SUM, SUM // SUM = 0
CMPQ LEN, $0
JE dot_end
MOVQ ix+72(FP), INC_X // INC_X = ix
MOVQ iy+80(FP), INC_Y // INC_Y = iy
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
MOVQ incX+56(FP), INC_X // INC_X = incX * sizeof(float32)
SHLQ $2, INC_X
MOVQ incY+64(FP), INC_Y // INC_Y = incY * sizeof(float32)
SHLQ $2, INC_Y
MOVQ LEN, TAIL
ANDQ $3, TAIL // TAIL = LEN % 4
SHRQ $2, LEN // LEN = floor( LEN / 4 )
JZ dot_tail // if LEN == 0 { goto dot_tail }
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
dot_loop: // Loop unrolled 4x do {
CVTSS2SD (X_PTR), X2 // X_i = x[i:i+1]
CVTSS2SD (X_PTR)(INC_X*1), X3
CVTSS2SD (X_PTR)(INC_X*2), X4
CVTSS2SD (X_PTR)(INCx3_X*1), X5
CVTSS2SD (Y_PTR), X6 // X_j = y[i:i+1]
CVTSS2SD (Y_PTR)(INC_Y*1), X7
CVTSS2SD (Y_PTR)(INC_Y*2), X8
CVTSS2SD (Y_PTR)(INCx3_Y*1), X9
MULSD X6, X2 // X_i *= X_j
MULSD X7, X3
MULSD X8, X4
MULSD X9, X5
ADDSD X2, SUM // SUM += X_i
ADDSD X3, P_SUM
ADDSD X4, SUM
ADDSD X5, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDSD P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
CVTSS2SD (X_PTR), X2 // X2 = x[i]
CVTSS2SD (Y_PTR), X3 // X2 *= y[i]
MULSD X3, X2
ADDSD X2, SUM // SUM += X2
ADDQ INC_X, X_PTR // X_PTR += INC_X
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVSD SUM, sum+88(FP) // return SUM
RET
@@ -0,0 +1,110 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define HADDPD_SUM_SUM LONG $0xC07C0F66 // @ HADDPD X0, X0
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define IDX AX
#define SUM X0
#define P_SUM X1
// func DdotUnitary(x, y []float32) (sum float32)
TEXT ·DdotUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
PXOR SUM, SUM // psum = 0
CMPQ LEN, $0
JE dot_end
XORQ IDX, IDX
MOVQ Y_PTR, DX
ANDQ $0xF, DX // Align on 16-byte boundary for ADDPS
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
SUBQ $16, DX
dot_align: // Trim first value(s) in unaligned buffer do {
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
MULSD X3, X2
ADDSD X2, SUM // SUM += X2
INCQ IDX // IDX++
DECQ LEN
JZ dot_end // if --TAIL == 0 { return }
ADDQ $4, DX
JNZ dot_align // } while --LEN > 0
dot_no_trim:
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
MOVQ LEN, TAIL
ANDQ $0x7, TAIL // TAIL = LEN % 8
SHRQ $3, LEN // LEN = floor( LEN / 8 )
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
dot_loop: // Loop unrolled 8x do {
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
CVTPS2PD 8(X_PTR)(IDX*4), X3
CVTPS2PD 16(X_PTR)(IDX*4), X4
CVTPS2PD 24(X_PTR)(IDX*4), X5
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
CVTPS2PD 8(Y_PTR)(IDX*4), X7
CVTPS2PD 16(Y_PTR)(IDX*4), X8
CVTPS2PD 24(Y_PTR)(IDX*4), X9
MULPD X6, X2 // X_i *= X_j
MULPD X7, X3
MULPD X8, X4
MULPD X9, X5
ADDPD X2, SUM // SUM += X_i
ADDPD X3, P_SUM
ADDPD X4, SUM
ADDPD X5, P_SUM
ADDQ $8, IDX // IDX += 8
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPD P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail_start:
MOVQ TAIL, LEN
SHRQ $1, LEN
JZ dot_tail_one
dot_tail_two:
CVTPS2PD (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
CVTPS2PD (Y_PTR)(IDX*4), X6 // X_j = y[i:i+1]
MULPD X6, X2 // X_i *= X_j
ADDPD X2, SUM // SUM += X_i
ADDQ $2, IDX // IDX += 2
DECQ LEN
JNZ dot_tail_two // } while --LEN > 0
ANDQ $1, TAIL
JZ dot_end
dot_tail_one:
CVTSS2SD (X_PTR)(IDX*4), X2 // X2 = float64(x[i])
CVTSS2SD (Y_PTR)(IDX*4), X3 // X3 = float64(y[i])
MULSD X3, X2 // X2 *= X3
ADDSD X2, SUM // SUM += X2
dot_end:
HADDPD_SUM_SUM // SUM = \sum{ SUM[i] }
MOVSD SUM, sum+48(FP) // return SUM
RET
@@ -0,0 +1,6 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package f32 provides float32 vector primitives.
package f32
@@ -0,0 +1,85 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define INC_X R8
#define INCx3_X R10
#define INC_Y R9
#define INCx3_Y R11
#define SUM X0
#define P_SUM X1
// func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
TEXT ·DotInc(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
PXOR SUM, SUM // SUM = 0
MOVQ n+48(FP), LEN // LEN = n
CMPQ LEN, $0
JE dot_end
MOVQ ix+72(FP), INC_X // INC_X = ix
MOVQ iy+80(FP), INC_Y // INC_Y = iy
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(x[ix])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(y[iy])
MOVQ incX+56(FP), INC_X // INC_X := incX * sizeof(float32)
SHLQ $2, INC_X
MOVQ incY+64(FP), INC_Y // INC_Y := incY * sizeof(float32)
SHLQ $2, INC_Y
MOVQ LEN, TAIL
ANDQ $0x3, TAIL // TAIL = LEN % 4
SHRQ $2, LEN // LEN = floor( LEN / 4 )
JZ dot_tail // if LEN == 0 { goto dot_tail }
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
LEAQ (INC_X)(INC_X*2), INCx3_X // INCx3_X = INC_X * 3
LEAQ (INC_Y)(INC_Y*2), INCx3_Y // INCx3_Y = INC_Y * 3
dot_loop: // Loop unrolled 4x do {
MOVSS (X_PTR), X2 // X_i = x[i:i+1]
MOVSS (X_PTR)(INC_X*1), X3
MOVSS (X_PTR)(INC_X*2), X4
MOVSS (X_PTR)(INCx3_X*1), X5
MULSS (Y_PTR), X2 // X_i *= y[i:i+1]
MULSS (Y_PTR)(INC_Y*1), X3
MULSS (Y_PTR)(INC_Y*2), X4
MULSS (Y_PTR)(INCx3_Y*1), X5
ADDSS X2, SUM // SUM += X_i
ADDSS X3, P_SUM
ADDSS X4, SUM
ADDSS X5, P_SUM
LEAQ (X_PTR)(INC_X*4), X_PTR // X_PTR = &(X_PTR[INC_X * 4])
LEAQ (Y_PTR)(INC_Y*4), Y_PTR // Y_PTR = &(Y_PTR[INC_Y * 4])
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDSS P_SUM, SUM // P_SUM += SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail: // do {
MOVSS (X_PTR), X2 // X2 = x[i]
MULSS (Y_PTR), X2 // X2 *= y[i]
ADDSS X2, SUM // SUM += X2
ADDQ INC_X, X_PTR // X_PTR += INC_X
ADDQ INC_Y, Y_PTR // Y_PTR += INC_Y
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
MOVSS SUM, sum+88(FP) // return SUM
RET
@@ -0,0 +1,106 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define HADDPS_SUM_SUM LONG $0xC07C0FF2 // @ HADDPS X0, X0
#define X_PTR SI
#define Y_PTR DI
#define LEN CX
#define TAIL BX
#define IDX AX
#define SUM X0
#define P_SUM X1
// func DotUnitary(x, y []float32) (sum float32)
TEXT ·DotUnitary(SB), NOSPLIT, $0
MOVQ x_base+0(FP), X_PTR // X_PTR = &x
MOVQ y_base+24(FP), Y_PTR // Y_PTR = &y
PXOR SUM, SUM // SUM = 0
MOVQ x_len+8(FP), LEN // LEN = min( len(x), len(y) )
CMPQ y_len+32(FP), LEN
CMOVQLE y_len+32(FP), LEN
CMPQ LEN, $0
JE dot_end
XORQ IDX, IDX
MOVQ Y_PTR, DX
ANDQ $0xF, DX // Align on 16-byte boundary for MULPS
JZ dot_no_trim // if DX == 0 { goto dot_no_trim }
SUBQ $16, DX
dot_align: // Trim first value(s) in unaligned buffer do {
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
ADDSS X2, SUM // SUM += X2
INCQ IDX // IDX++
DECQ LEN
JZ dot_end // if --TAIL == 0 { return }
ADDQ $4, DX
JNZ dot_align // } while --DX > 0
dot_no_trim:
PXOR P_SUM, P_SUM // P_SUM = 0 for pipelining
MOVQ LEN, TAIL
ANDQ $0xF, TAIL // TAIL = LEN % 16
SHRQ $4, LEN // LEN = floor( LEN / 16 )
JZ dot_tail4_start // if LEN == 0 { goto dot_tail4_start }
dot_loop: // Loop unrolled 16x do {
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
MOVUPS 16(X_PTR)(IDX*4), X3
MOVUPS 32(X_PTR)(IDX*4), X4
MOVUPS 48(X_PTR)(IDX*4), X5
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
MULPS 16(Y_PTR)(IDX*4), X3
MULPS 32(Y_PTR)(IDX*4), X4
MULPS 48(Y_PTR)(IDX*4), X5
ADDPS X2, SUM // SUM += X_i
ADDPS X3, P_SUM
ADDPS X4, SUM
ADDPS X5, P_SUM
ADDQ $16, IDX // IDX += 16
DECQ LEN
JNZ dot_loop // } while --LEN > 0
ADDPS P_SUM, SUM // SUM += P_SUM
CMPQ TAIL, $0 // if TAIL == 0 { return }
JE dot_end
dot_tail4_start: // Reset loop counter for 4-wide tail loop
MOVQ TAIL, LEN // LEN = floor( TAIL / 4 )
SHRQ $2, LEN
JZ dot_tail_start // if LEN == 0 { goto dot_tail_start }
dot_tail4_loop: // Loop unrolled 4x do {
MOVUPS (X_PTR)(IDX*4), X2 // X_i = x[i:i+1]
MULPS (Y_PTR)(IDX*4), X2 // X_i *= y[i:i+1]
ADDPS X2, SUM // SUM += X_i
ADDQ $4, IDX // i += 4
DECQ LEN
JNZ dot_tail4_loop // } while --LEN > 0
dot_tail_start: // Reset loop counter for 1-wide tail loop
ANDQ $3, TAIL // TAIL = TAIL % 4
JZ dot_end // if TAIL == 0 { return }
dot_tail: // do {
MOVSS (X_PTR)(IDX*4), X2 // X2 = x[i]
MULSS (Y_PTR)(IDX*4), X2 // X2 *= y[i]
ADDSS X2, SUM // psum += X2
INCQ IDX // IDX++
DECQ TAIL
JNZ dot_tail // } while --TAIL > 0
dot_end:
HADDPS_SUM_SUM // SUM = \sum{ SUM[i] }
HADDPS_SUM_SUM
MOVSS SUM, sum+48(FP) // return SUM
RET
@@ -0,0 +1,15 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !noasm,!appengine,!safe
package f32
// Ger performs the rank-one operation
// A += alpha * x * y^T
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func Ger(m, n uintptr, alpha float32,
x []float32, incX uintptr,
y []float32, incY uintptr,
a []float32, lda uintptr)
@@ -0,0 +1,757 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//+build !noasm,!appengine,!safe
#include "textflag.h"
#define SIZE 4
#define BITSIZE 2
#define KERNELSIZE 3
#define M_DIM m+0(FP)
#define M CX
#define N_DIM n+8(FP)
#define N BX
#define TMP1 R14
#define TMP2 R15
#define X_PTR SI
#define Y y_base+56(FP)
#define Y_PTR DX
#define A_ROW AX
#define A_PTR DI
#define INC_X R8
#define INC3_X R9
#define INC_Y R10
#define INC3_Y R11
#define LDA R12
#define LDA3 R13
#define ALPHA X0
#define ALPHA_SPILL al-16(SP)
#define LOAD_ALPHA \
MOVSS alpha+16(FP), ALPHA \
SHUFPS $0, ALPHA, ALPHA
#define LOAD_SCALED4 \
PREFETCHNTA 16*SIZE(X_PTR) \
MOVDDUP (X_PTR), X1 \
MOVDDUP 2*SIZE(X_PTR), X3 \
MOVSHDUP X1, X2 \
MOVSHDUP X3, X4 \
MOVSLDUP X1, X1 \
MOVSLDUP X3, X3 \
MULPS ALPHA, X1 \
MULPS ALPHA, X2 \
MULPS ALPHA, X3 \
MULPS ALPHA, X4
#define LOAD_SCALED2 \
MOVDDUP (X_PTR), X1 \
MOVSHDUP X1, X2 \
MOVSLDUP X1, X1 \
MULPS ALPHA, X1 \
MULPS ALPHA, X2
#define LOAD_SCALED1 \
MOVSS (X_PTR), X1 \
SHUFPS $0, X1, X1 \
MULPS ALPHA, X1
#define LOAD_SCALED4_INC \
PREFETCHNTA (X_PTR)(INC_X*8) \
MOVSS (X_PTR), X1 \
MOVSS (X_PTR)(INC_X*1), X2 \
MOVSS (X_PTR)(INC_X*2), X3 \
MOVSS (X_PTR)(INC3_X*1), X4 \
SHUFPS $0, X1, X1 \
SHUFPS $0, X2, X2 \
SHUFPS $0, X3, X3 \
SHUFPS $0, X4, X4 \
MULPS ALPHA, X1 \
MULPS ALPHA, X2 \
MULPS ALPHA, X3 \
MULPS ALPHA, X4
#define LOAD_SCALED2_INC \
MOVSS (X_PTR), X1 \
MOVSS (X_PTR)(INC_X*1), X2 \
SHUFPS $0, X1, X1 \
SHUFPS $0, X2, X2 \
MULPS ALPHA, X1 \
MULPS ALPHA, X2
#define KERNEL_LOAD8 \
MOVUPS (Y_PTR), X5 \
MOVUPS 4*SIZE(Y_PTR), X6
#define KERNEL_LOAD8_INC \
MOVSS (Y_PTR), X5 \
MOVSS (Y_PTR)(INC_Y*1), X6 \
MOVSS (Y_PTR)(INC_Y*2), X7 \
MOVSS (Y_PTR)(INC3_Y*1), X8 \
UNPCKLPS X6, X5 \
UNPCKLPS X8, X7 \
MOVLHPS X7, X5 \
LEAQ (Y_PTR)(INC_Y*4), Y_PTR \
MOVSS (Y_PTR), X6 \
MOVSS (Y_PTR)(INC_Y*1), X7 \
MOVSS (Y_PTR)(INC_Y*2), X8 \
MOVSS (Y_PTR)(INC3_Y*1), X9 \
UNPCKLPS X7, X6 \
UNPCKLPS X9, X8 \
MOVLHPS X8, X6
#define KERNEL_LOAD4 \
MOVUPS (Y_PTR), X5
#define KERNEL_LOAD4_INC \
MOVSS (Y_PTR), X5 \
MOVSS (Y_PTR)(INC_Y*1), X6 \
MOVSS (Y_PTR)(INC_Y*2), X7 \
MOVSS (Y_PTR)(INC3_Y*1), X8 \
UNPCKLPS X6, X5 \
UNPCKLPS X8, X7 \
MOVLHPS X7, X5
#define KERNEL_LOAD2 \
MOVSD (Y_PTR), X5
#define KERNEL_LOAD2_INC \
MOVSS (Y_PTR), X5 \
MOVSS (Y_PTR)(INC_Y*1), X6 \
UNPCKLPS X6, X5
#define KERNEL_4x8 \
MOVUPS X5, X7 \
MOVUPS X6, X8 \
MOVUPS X5, X9 \
MOVUPS X6, X10 \
MOVUPS X5, X11 \
MOVUPS X6, X12 \
MULPS X1, X5 \
MULPS X1, X6 \
MULPS X2, X7 \
MULPS X2, X8 \
MULPS X3, X9 \
MULPS X3, X10 \
MULPS X4, X11 \
MULPS X4, X12
#define STORE_4x8 \
MOVUPS ALPHA, ALPHA_SPILL \
MOVUPS (A_PTR), X13 \
ADDPS X13, X5 \
MOVUPS 4*SIZE(A_PTR), X14 \
ADDPS X14, X6 \
MOVUPS (A_PTR)(LDA*1), X15 \
ADDPS X15, X7 \
MOVUPS 4*SIZE(A_PTR)(LDA*1), X0 \
ADDPS X0, X8 \
MOVUPS (A_PTR)(LDA*2), X13 \
ADDPS X13, X9 \
MOVUPS 4*SIZE(A_PTR)(LDA*2), X14 \
ADDPS X14, X10 \
MOVUPS (A_PTR)(LDA3*1), X15 \
ADDPS X15, X11 \
MOVUPS 4*SIZE(A_PTR)(LDA3*1), X0 \
ADDPS X0, X12 \
MOVUPS X5, (A_PTR) \
MOVUPS X6, 4*SIZE(A_PTR) \
MOVUPS X7, (A_PTR)(LDA*1) \
MOVUPS X8, 4*SIZE(A_PTR)(LDA*1) \
MOVUPS X9, (A_PTR)(LDA*2) \
MOVUPS X10, 4*SIZE(A_PTR)(LDA*2) \
MOVUPS X11, (A_PTR)(LDA3*1) \
MOVUPS X12, 4*SIZE(A_PTR)(LDA3*1) \
MOVUPS ALPHA_SPILL, ALPHA \
ADDQ $8*SIZE, A_PTR
#define KERNEL_4x4 \
MOVUPS X5, X6 \
MOVUPS X5, X7 \
MOVUPS X5, X8 \
MULPS X1, X5 \
MULPS X2, X6 \
MULPS X3, X7 \
MULPS X4, X8
#define STORE_4x4 \
MOVUPS (A_PTR), X13 \
ADDPS X13, X5 \
MOVUPS (A_PTR)(LDA*1), X14 \
ADDPS X14, X6 \
MOVUPS (A_PTR)(LDA*2), X15 \
ADDPS X15, X7 \
MOVUPS (A_PTR)(LDA3*1), X13 \
ADDPS X13, X8 \
MOVUPS X5, (A_PTR) \
MOVUPS X6, (A_PTR)(LDA*1) \
MOVUPS X7, (A_PTR)(LDA*2) \
MOVUPS X8, (A_PTR)(LDA3*1) \
ADDQ $4*SIZE, A_PTR
#define KERNEL_4x2 \
MOVUPS X5, X6 \
MOVUPS X5, X7 \
MOVUPS X5, X8 \
MULPS X1, X5 \
MULPS X2, X6 \
MULPS X3, X7 \
MULPS X4, X8
#define STORE_4x2 \
MOVSD (A_PTR), X9 \
ADDPS X9, X5 \
MOVSD (A_PTR)(LDA*1), X10 \
ADDPS X10, X6 \
MOVSD (A_PTR)(LDA*2), X11 \
ADDPS X11, X7 \
MOVSD (A_PTR)(LDA3*1), X12 \
ADDPS X12, X8 \
MOVSD X5, (A_PTR) \
MOVSD X6, (A_PTR)(LDA*1) \
MOVSD X7, (A_PTR)(LDA*2) \
MOVSD X8, (A_PTR)(LDA3*1) \
ADDQ $2*SIZE, A_PTR
#define KERNEL_4x1 \
MOVSS (Y_PTR), X5 \
MOVSS X5, X6 \
MOVSS X5, X7 \
MOVSS X5, X8 \
MULSS X1, X5 \
MULSS X2, X6 \
MULSS X3, X7 \
MULSS X4, X8
#define STORE_4x1 \
ADDSS (A_PTR), X5 \
ADDSS (A_PTR)(LDA*1), X6 \
ADDSS (A_PTR)(LDA*2), X7 \
ADDSS (A_PTR)(LDA3*1), X8 \
MOVSS X5, (A_PTR) \
MOVSS X6, (A_PTR)(LDA*1) \
MOVSS X7, (A_PTR)(LDA*2) \
MOVSS X8, (A_PTR)(LDA3*1) \
ADDQ $SIZE, A_PTR
#define KERNEL_2x8 \
MOVUPS X5, X7 \
MOVUPS X6, X8 \
MULPS X1, X5 \
MULPS X1, X6 \
MULPS X2, X7 \
MULPS X2, X8
#define STORE_2x8 \
MOVUPS (A_PTR), X9 \
ADDPS X9, X5 \
MOVUPS 4*SIZE(A_PTR), X10 \
ADDPS X10, X6 \
MOVUPS (A_PTR)(LDA*1), X11 \
ADDPS X11, X7 \
MOVUPS 4*SIZE(A_PTR)(LDA*1), X12 \
ADDPS X12, X8 \
MOVUPS X5, (A_PTR) \
MOVUPS X6, 4*SIZE(A_PTR) \
MOVUPS X7, (A_PTR)(LDA*1) \
MOVUPS X8, 4*SIZE(A_PTR)(LDA*1) \
ADDQ $8*SIZE, A_PTR
#define KERNEL_2x4 \
MOVUPS X5, X6 \
MULPS X1, X5 \
MULPS X2, X6
#define STORE_2x4 \
MOVUPS (A_PTR), X9 \
ADDPS X9, X5 \
MOVUPS (A_PTR)(LDA*1), X11 \
ADDPS X11, X6 \
MOVUPS X5, (A_PTR) \
MOVUPS X6, (A_PTR)(LDA*1) \
ADDQ $4*SIZE, A_PTR
#define KERNEL_2x2 \
MOVSD X5, X6 \
MULPS X1, X5 \
MULPS X2, X6
#define STORE_2x2 \
MOVSD (A_PTR), X7 \
ADDPS X7, X5 \
MOVSD (A_PTR)(LDA*1), X8 \
ADDPS X8, X6 \
MOVSD X5, (A_PTR) \
MOVSD X6, (A_PTR)(LDA*1) \
ADDQ $2*SIZE, A_PTR
#define KERNEL_2x1 \
MOVSS (Y_PTR), X5 \
MOVSS X5, X6 \
MULSS X1, X5 \
MULSS X2, X6
#define STORE_2x1 \
ADDSS (A_PTR), X5 \
ADDSS (A_PTR)(LDA*1), X6 \
MOVSS X5, (A_PTR) \
MOVSS X6, (A_PTR)(LDA*1) \
ADDQ $SIZE, A_PTR
#define KERNEL_1x8 \
MULPS X1, X5 \
MULPS X1, X6
#define STORE_1x8 \
MOVUPS (A_PTR), X7 \
ADDPS X7, X5 \
MOVUPS 4*SIZE(A_PTR), X8 \
ADDPS X8, X6 \
MOVUPS X5, (A_PTR) \
MOVUPS X6, 4*SIZE(A_PTR) \
ADDQ $8*SIZE, A_PTR
#define KERNEL_1x4 \
MULPS X1, X5 \
MULPS X1, X6
#define STORE_1x4 \
MOVUPS (A_PTR), X7 \
ADDPS X7, X5 \
MOVUPS X5, (A_PTR) \
ADDQ $4*SIZE, A_PTR
#define KERNEL_1x2 \
MULPS X1, X5
#define STORE_1x2 \
MOVSD (A_PTR), X6 \
ADDPS X6, X5 \
MOVSD X5, (A_PTR) \
ADDQ $2*SIZE, A_PTR
#define KERNEL_1x1 \
MOVSS (Y_PTR), X5 \
MULSS X1, X5
#define STORE_1x1 \
ADDSS (A_PTR), X5 \
MOVSS X5, (A_PTR) \
ADDQ $SIZE, A_PTR
// func Ger(m, n uintptr, alpha float32,
// x []float32, incX uintptr,
// y []float32, incY uintptr,
// a []float32, lda uintptr)
TEXT ·Ger(SB), 0, $16-120
MOVQ M_DIM, M
MOVQ N_DIM, N
CMPQ M, $0
JE end
CMPQ N, $0
JE end
LOAD_ALPHA
MOVQ x_base+24(FP), X_PTR
MOVQ y_base+56(FP), Y_PTR
MOVQ a_base+88(FP), A_ROW
MOVQ A_ROW, A_PTR
MOVQ lda+112(FP), LDA // LDA = LDA * sizeof(float32)
SHLQ $BITSIZE, LDA
LEAQ (LDA)(LDA*2), LDA3 // LDA3 = LDA * 3
CMPQ incY+80(FP), $1 // Check for dense vector Y (fast-path)
JNE inc
CMPQ incX+48(FP), $1 // Check for dense vector X (fast-path)
JNE inc
SHRQ $2, M
JZ r2
r4:
// LOAD 4
LOAD_SCALED4
MOVQ N_DIM, N
SHRQ $KERNELSIZE, N
JZ r4c4
r4c8:
// 4x8 KERNEL
KERNEL_LOAD8
KERNEL_4x8
STORE_4x8
ADDQ $8*SIZE, Y_PTR
DECQ N
JNZ r4c8
r4c4:
TESTQ $4, N_DIM
JZ r4c2
// 4x4 KERNEL
KERNEL_LOAD4
KERNEL_4x4
STORE_4x4
ADDQ $4*SIZE, Y_PTR
r4c2:
TESTQ $2, N_DIM
JZ r4c1
// 4x2 KERNEL
KERNEL_LOAD2
KERNEL_4x2
STORE_4x2
ADDQ $2*SIZE, Y_PTR
r4c1:
TESTQ $1, N_DIM
JZ r4end
// 4x1 KERNEL
KERNEL_4x1
STORE_4x1
ADDQ $SIZE, Y_PTR
r4end:
ADDQ $4*SIZE, X_PTR
MOVQ Y, Y_PTR
LEAQ (A_ROW)(LDA*4), A_ROW
MOVQ A_ROW, A_PTR
DECQ M
JNZ r4
r2:
TESTQ $2, M_DIM
JZ r1
// LOAD 2
LOAD_SCALED2
MOVQ N_DIM, N
SHRQ $KERNELSIZE, N
JZ r2c4
r2c8:
// 2x8 KERNEL
KERNEL_LOAD8
KERNEL_2x8
STORE_2x8
ADDQ $8*SIZE, Y_PTR
DECQ N
JNZ r2c8
r2c4:
TESTQ $4, N_DIM
JZ r2c2
// 2x4 KERNEL
KERNEL_LOAD4
KERNEL_2x4
STORE_2x4
ADDQ $4*SIZE, Y_PTR
r2c2:
TESTQ $2, N_DIM
JZ r2c1
// 2x2 KERNEL
KERNEL_LOAD2
KERNEL_2x2
STORE_2x2
ADDQ $2*SIZE, Y_PTR
r2c1:
TESTQ $1, N_DIM
JZ r2end
// 2x1 KERNEL
KERNEL_2x1
STORE_2x1
ADDQ $SIZE, Y_PTR
r2end:
ADDQ $2*SIZE, X_PTR
MOVQ Y, Y_PTR
LEAQ (A_ROW)(LDA*2), A_ROW
MOVQ A_ROW, A_PTR
r1:
TESTQ $1, M_DIM
JZ end
// LOAD 1
LOAD_SCALED1
MOVQ N_DIM, N
SHRQ $KERNELSIZE, N
JZ r1c4
r1c8:
// 1x8 KERNEL
KERNEL_LOAD8
KERNEL_1x8
STORE_1x8
ADDQ $8*SIZE, Y_PTR
DECQ N
JNZ r1c8
r1c4:
TESTQ $4, N_DIM
JZ r1c2
// 1x4 KERNEL
KERNEL_LOAD4
KERNEL_1x4
STORE_1x4
ADDQ $4*SIZE, Y_PTR
r1c2:
TESTQ $2, N_DIM
JZ r1c1
// 1x2 KERNEL
KERNEL_LOAD2
KERNEL_1x2
STORE_1x2
ADDQ $2*SIZE, Y_PTR
r1c1:
TESTQ $1, N_DIM
JZ end
// 1x1 KERNEL
KERNEL_1x1
STORE_1x1
end:
RET
inc: // Algorithm for incY != 0 ( split loads in kernel )
MOVQ incX+48(FP), INC_X // INC_X = incX * sizeof(float32)
SHLQ $BITSIZE, INC_X
MOVQ incY+80(FP), INC_Y // INC_Y = incY * sizeof(float32)
SHLQ $BITSIZE, INC_Y
LEAQ (INC_X)(INC_X*2), INC3_X // INC3_X = INC_X * 3
LEAQ (INC_Y)(INC_Y*2), INC3_Y // INC3_Y = INC_Y * 3
XORQ TMP2, TMP2
MOVQ M, TMP1
SUBQ $1, TMP1
IMULQ INC_X, TMP1
NEGQ TMP1
CMPQ INC_X, $0
CMOVQLT TMP1, TMP2
LEAQ (X_PTR)(TMP2*SIZE), X_PTR
XORQ TMP2, TMP2
MOVQ N, TMP1
SUBQ $1, TMP1
IMULQ INC_Y, TMP1
NEGQ TMP1
CMPQ INC_Y, $0
CMOVQLT TMP1, TMP2
LEAQ (Y_PTR)(TMP2*SIZE), Y_PTR
SHRQ $2, M
JZ inc_r2
inc_r4:
// LOAD 4
LOAD_SCALED4_INC
MOVQ N_DIM, N
SHRQ $KERNELSIZE, N
JZ inc_r4c4
inc_r4c8:
// 4x4 KERNEL
KERNEL_LOAD8_INC
KERNEL_4x8
STORE_4x8
LEAQ (Y_PTR)(INC_Y*4), Y_PTR
DECQ N
JNZ inc_r4c8
inc_r4c4:
TESTQ $4, N_DIM
JZ inc_r4c2
// 4x4 KERNEL
KERNEL_LOAD4_INC
KERNEL_4x4
STORE_4x4
LEAQ (Y_PTR)(INC_Y*4), Y_PTR
inc_r4c2:
TESTQ $2, N_DIM
JZ inc_r4c1
// 4x2 KERNEL
KERNEL_LOAD2_INC
KERNEL_4x2
STORE_4x2
LEAQ (Y_PTR)(INC_Y*2), Y_PTR
inc_r4c1:
TESTQ $1, N_DIM
JZ inc_r4end
// 4x1 KERNEL
KERNEL_4x1
STORE_4x1
ADDQ INC_Y, Y_PTR
inc_r4end:
LEAQ (X_PTR)(INC_X*4), X_PTR
MOVQ Y, Y_PTR
LEAQ (A_ROW)(LDA*4), A_ROW
MOVQ A_ROW, A_PTR
DECQ M
JNZ inc_r4
inc_r2:
TESTQ $2, M_DIM
JZ inc_r1
// LOAD 2
LOAD_SCALED2_INC
MOVQ N_DIM, N
SHRQ $KERNELSIZE, N
JZ inc_r2c4
inc_r2c8:
// 2x8 KERNEL
KERNEL_LOAD8_INC
KERNEL_2x8
STORE_2x8
LEAQ (Y_PTR)(INC_Y*4), Y_PTR
DECQ N
JNZ inc_r2c8
inc_r2c4:
TESTQ $4, N_DIM
JZ inc_r2c2
// 2x4 KERNEL
KERNEL_LOAD4_INC
KERNEL_2x4
STORE_2x4
LEAQ (Y_PTR)(INC_Y*4), Y_PTR
inc_r2c2:
TESTQ $2, N_DIM
JZ inc_r2c1
// 2x2 KERNEL
KERNEL_LOAD2_INC
KERNEL_2x2
STORE_2x2
LEAQ (Y_PTR)(INC_Y*2), Y_PTR
inc_r2c1:
TESTQ $1, N_DIM
JZ inc_r2end
// 2x1 KERNEL
KERNEL_2x1
STORE_2x1
ADDQ INC_Y, Y_PTR
inc_r2end:
LEAQ (X_PTR)(INC_X*2), X_PTR
MOVQ Y, Y_PTR
LEAQ (A_ROW)(LDA*2), A_ROW
MOVQ A_ROW, A_PTR
inc_r1:
TESTQ $1, M_DIM
JZ end
// LOAD 1
LOAD_SCALED1
MOVQ N_DIM, N
SHRQ $KERNELSIZE, N
JZ inc_r1c4
inc_r1c8:
// 1x8 KERNEL
KERNEL_LOAD8_INC
KERNEL_1x8
STORE_1x8
LEAQ (Y_PTR)(INC_Y*4), Y_PTR
DECQ N
JNZ inc_r1c8
inc_r1c4:
TESTQ $4, N_DIM
JZ inc_r1c2
// 1x4 KERNEL
KERNEL_LOAD4_INC
KERNEL_1x4
STORE_1x4
LEAQ (Y_PTR)(INC_Y*4), Y_PTR
inc_r1c2:
TESTQ $2, N_DIM
JZ inc_r1c1
// 1x2 KERNEL
KERNEL_LOAD2_INC
KERNEL_1x2
STORE_1x2
LEAQ (Y_PTR)(INC_Y*2), Y_PTR
inc_r1c1:
TESTQ $1, N_DIM
JZ inc_end
// 1x1 KERNEL
KERNEL_1x1
STORE_1x1
inc_end:
RET
@@ -0,0 +1,36 @@
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm appengine safe
package f32
// Ger performs the rank-one operation
// A += alpha * x * y^T
// where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func Ger(m, n uintptr, alpha float32, x []float32, incX uintptr, y []float32, incY uintptr, a []float32, lda uintptr) {
if incX == 1 && incY == 1 {
x = x[:m]
y = y[:n]
for i, xv := range x {
AxpyUnitary(alpha*xv, y, a[uintptr(i)*lda:uintptr(i)*lda+n])
}
return
}
var ky, kx uintptr
if int(incY) < 0 {
ky = uintptr(-int(n-1) * int(incY))
}
if int(incX) < 0 {
kx = uintptr(-int(m-1) * int(incX))
}
ix := kx
for i := 0; i < int(m); i++ {
AxpyInc(alpha*x[ix], y, a[uintptr(i)*lda:uintptr(i)*lda+n], uintptr(n), uintptr(incY), 1, uintptr(ky), 0)
ix += incX
}
}
@@ -0,0 +1,55 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package f32
// ScalUnitary is
// for i := range x {
// x[i] *= alpha
// }
func ScalUnitary(alpha float32, x []float32) {
for i := range x {
x[i] *= alpha
}
}
// ScalUnitaryTo is
// for i, v := range x {
// dst[i] = alpha * v
// }
func ScalUnitaryTo(dst []float32, alpha float32, x []float32) {
for i, v := range x {
dst[i] = alpha * v
}
}
// ScalInc is
// var ix uintptr
// for i := 0; i < int(n); i++ {
// x[ix] *= alpha
// ix += incX
// }
func ScalInc(alpha float32, x []float32, n, incX uintptr) {
var ix uintptr
for i := 0; i < int(n); i++ {
x[ix] *= alpha
ix += incX
}
}
// ScalIncTo is
// var idst, ix uintptr
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha * x[ix]
// ix += incX
// idst += incDst
// }
func ScalIncTo(dst []float32, incDst uintptr, alpha float32, x []float32, n, incX uintptr) {
var idst, ix uintptr
for i := 0; i < int(n); i++ {
dst[idst] = alpha * x[ix]
ix += incX
idst += incDst
}
}
@@ -0,0 +1,68 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !noasm,!appengine,!safe
package f32
// AxpyUnitary is
// for i, v := range x {
// y[i] += alpha * v
// }
func AxpyUnitary(alpha float32, x, y []float32)
// AxpyUnitaryTo is
// for i, v := range x {
// dst[i] = alpha*v + y[i]
// }
func AxpyUnitaryTo(dst []float32, alpha float32, x, y []float32)
// AxpyInc is
// for i := 0; i < int(n); i++ {
// y[iy] += alpha * x[ix]
// ix += incX
// iy += incY
// }
func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
// AxpyIncTo is
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha*x[ix] + y[iy]
// ix += incX
// iy += incY
// idst += incDst
// }
func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr)
// DdotUnitary is
// for i, v := range x {
// sum += float64(y[i]) * float64(v)
// }
// return
func DdotUnitary(x, y []float32) (sum float64)
// DdotInc is
// for i := 0; i < int(n); i++ {
// sum += float64(y[iy]) * float64(x[ix])
// ix += incX
// iy += incY
// }
// return
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64)
// DotUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotUnitary(x, y []float32) (sum float32)
// DotInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32)
@@ -0,0 +1,113 @@
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm appengine safe
package f32
// AxpyUnitary is
// for i, v := range x {
// y[i] += alpha * v
// }
func AxpyUnitary(alpha float32, x, y []float32) {
for i, v := range x {
y[i] += alpha * v
}
}
// AxpyUnitaryTo is
// for i, v := range x {
// dst[i] = alpha*v + y[i]
// }
func AxpyUnitaryTo(dst []float32, alpha float32, x, y []float32) {
for i, v := range x {
dst[i] = alpha*v + y[i]
}
}
// AxpyInc is
// for i := 0; i < int(n); i++ {
// y[iy] += alpha * x[ix]
// ix += incX
// iy += incY
// }
func AxpyInc(alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) {
for i := 0; i < int(n); i++ {
y[iy] += alpha * x[ix]
ix += incX
iy += incY
}
}
// AxpyIncTo is
// for i := 0; i < int(n); i++ {
// dst[idst] = alpha*x[ix] + y[iy]
// ix += incX
// iy += incY
// idst += incDst
// }
func AxpyIncTo(dst []float32, incDst, idst uintptr, alpha float32, x, y []float32, n, incX, incY, ix, iy uintptr) {
for i := 0; i < int(n); i++ {
dst[idst] = alpha*x[ix] + y[iy]
ix += incX
iy += incY
idst += incDst
}
}
// DotUnitary is
// for i, v := range x {
// sum += y[i] * v
// }
// return sum
func DotUnitary(x, y []float32) (sum float32) {
for i, v := range x {
sum += y[i] * v
}
return sum
}
// DotInc is
// for i := 0; i < int(n); i++ {
// sum += y[iy] * x[ix]
// ix += incX
// iy += incY
// }
// return sum
func DotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float32) {
for i := 0; i < int(n); i++ {
sum += y[iy] * x[ix]
ix += incX
iy += incY
}
return sum
}
// DdotUnitary is
// for i, v := range x {
// sum += float64(y[i]) * float64(v)
// }
// return
func DdotUnitary(x, y []float32) (sum float64) {
for i, v := range x {
sum += float64(y[i]) * float64(v)
}
return
}
// DdotInc is
// for i := 0; i < int(n); i++ {
// sum += float64(y[iy]) * float64(x[ix])
// ix += incX
// iy += incY
// }
// return
func DdotInc(x, y []float32, n, incX, incY, ix, iy uintptr) (sum float64) {
for i := 0; i < int(n); i++ {
sum += float64(y[iy]) * float64(x[ix])
ix += incX
iy += incY
}
return
}

Some files were not shown because too many files have changed in this diff Show More