Initial commit.

llvm-svn: 147756
This commit is contained in:
Peter Collingbourne 2012-01-08 22:09:58 +00:00
parent 11faafe7dc
commit d5395fbf03
68 changed files with 1680 additions and 0 deletions

2
libclc/CREDITS.TXT Normal file
View File

@ -0,0 +1,2 @@
N: Peter Collingbourne
E: peter@pcc.me.uk

29
libclc/LICENSE.TXT Normal file
View File

@ -0,0 +1,29 @@
Copyright (c) 2011 by the contributors listed in CREDITS.TXT
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal with
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimers.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimers in the
documentation and/or other materials provided with the distribution.
* The names of the contributors may not be used to endorse or promote
products derived from this Software without specific prior written
permission.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.

37
libclc/README.TXT Normal file
View File

@ -0,0 +1,37 @@
libclc
------
libclc is an open source, BSD licensed implementation of the library
requirements of the OpenCL C programming language, as specified by the
OpenCL 1.1 Specification. The following sections of the specification
impose library requirements:
* 6.1: Supported Data Types
* 6.2.3: Explicit Conversions
* 6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
* 6.9: Preprocessor Directives and Macros
* 6.11: Built-in Functions
* 9.3: Double Precision Floating-Point
* 9.4: 64-bit Atomics
* 9.5: Writing to 3D image memory objects
* 9.6: Half Precision Floating-Point
libclc is intended to be used with the Clang compiler's OpenCL frontend.
libclc is designed to be portable and extensible. To this end, it provides
generic implementations of most library requirements, allowing the target
to override the generic implementation at the granularity of individual
functions.
libclc currently only supports the PTX target, but support for more
targets is welcome.
Compiling
---------
./configure.py --with-llvm-config=/path/to/llvm-config && make
Website
-------
http://www.pcc.me.uk/~peter/libclc/

91
libclc/build/metabuild.py Normal file
View File

@ -0,0 +1,91 @@
import ninja_syntax
import os
# Simple meta-build system.
class Make(object):
def __init__(self):
self.output = open(self.output_filename(), 'w')
self.rules = {}
self.rule_text = ''
self.all_targets = []
self.clean_files = []
self.distclean_files = []
self.output.write("""all::
ifndef VERBOSE
Verb = @
endif
""")
def output_filename(self):
return 'Makefile'
def rule(self, name, command, description=None, depfile=None,
generator=False):
self.rules[name] = {'command': command, 'description': description,
'depfile': depfile, 'generator': generator}
def build(self, output, rule, inputs=[], implicit=[], order_only=[]):
inputs = self._as_list(inputs)
implicit = self._as_list(implicit)
order_only = self._as_list(order_only)
output_dir = os.path.dirname(output)
if output_dir != '' and not os.path.isdir(output_dir):
os.makedirs(output_dir)
dollar_in = ' '.join(inputs)
subst = lambda text: text.replace('$in', dollar_in).replace('$out', output)
deps = ' '.join(inputs + implicit)
if order_only:
deps += ' | '
deps += ' '.join(order_only)
self.output.write('%s: %s\n' % (output, deps))
r = self.rules[rule]
command = subst(r['command'])
if r['description']:
desc = subst(r['description'])
self.output.write('\t@echo %s\n\t$(Verb) %s\n' % (desc, command))
else:
self.output.write('\t%s\n' % command)
if r['depfile']:
depfile = subst(r['depfile'])
self.output.write('-include '+depfile+'\n')
self.output.write('\n')
self.all_targets.append(output)
if r['generator']:
self.distclean_files.append(output)
else:
self.clean_files.append(output)
def _as_list(self, input):
if isinstance(input, list):
return input
return [input]
def finish(self):
self.output.write('all:: %s\n\n' % ' '.join(self.all_targets))
self.output.write('clean: \n\trm -f %s\n\n' % ' '.join(self.clean_files))
self.output.write('distclean: clean\n\trm -f %s\n' % ' '.join(self.distclean_files))
class Ninja(ninja_syntax.Writer):
def __init__(self):
ninja_syntax.Writer.__init__(self, open(self.output_filename(), 'w'))
def output_filename(self):
return 'build.ninja'
def finish(self):
pass
def from_name(name):
if name == 'make':
return Make()
if name == 'ninja':
return Ninja()
raise LookupError, 'unknown generator: %s; supported generators are make and ninja' % name

View File

@ -0,0 +1,110 @@
#!/usr/bin/python
"""Python module for generating .ninja files.
Note that this is emphatically not a required piece of Ninja; it's
just a helpful utility for build-file-generation systems that already
use Python.
"""
import textwrap
class Writer(object):
def __init__(self, output, width=78):
self.output = output
self.width = width
def newline(self):
self.output.write('\n')
def comment(self, text):
for line in textwrap.wrap(text, self.width - 2):
self.output.write('# ' + line + '\n')
def variable(self, key, value, indent=0):
if value is None:
return
if isinstance(value, list):
value = ' '.join(value)
self._line('%s = %s' % (key, value), indent)
def rule(self, name, command, description=None, depfile=None,
generator=False):
self._line('rule %s' % name)
self.variable('command', command, indent=1)
if description:
self.variable('description', description, indent=1)
if depfile:
self.variable('depfile', depfile, indent=1)
if generator:
self.variable('generator', '1', indent=1)
def build(self, outputs, rule, inputs=None, implicit=None, order_only=None,
variables=None):
outputs = self._as_list(outputs)
all_inputs = self._as_list(inputs)[:]
if implicit:
all_inputs.append('|')
all_inputs.extend(self._as_list(implicit))
if order_only:
all_inputs.append('||')
all_inputs.extend(self._as_list(order_only))
self._line('build %s: %s %s' % (' '.join(outputs),
rule,
' '.join(all_inputs)))
if variables:
for key, val in variables:
self.variable(key, val, indent=1)
return outputs
def include(self, path):
self._line('include %s' % path)
def subninja(self, path):
self._line('subninja %s' % path)
def default(self, paths):
self._line('default %s' % ' '.join(self._as_list(paths)))
def _line(self, text, indent=0):
"""Write 'text' word-wrapped at self.width characters."""
leading_space = ' ' * indent
while len(text) > self.width:
# The text is too wide; wrap if possible.
# Find the rightmost space that would obey our width constraint.
available_space = self.width - len(leading_space) - len(' $')
space = text.rfind(' ', 0, available_space)
if space < 0:
# No such space; just use the first space we can find.
space = text.find(' ', available_space)
if space < 0:
# Give up on breaking.
break
self.output.write(leading_space + text[0:space] + ' $\n')
text = text[space+1:]
# Subsequent lines are continuations, so indent them.
leading_space = ' ' * (indent+2)
self.output.write(leading_space + text + '\n')
def _as_list(self, input):
if input is None:
return []
if isinstance(input, list):
return input
return [input]
def escape(string):
"""Escape a string such that it can be embedded into a Ninja file without
further interpretation."""
assert '\n' not in string, 'Ninja syntax does not allow newlines'
# We only have one special metacharacter: '$'.
return string.replace('$', '$$')

3
libclc/compile-test.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/sh
clang -ccc-host-triple ptx32--nvidiacl -Iptx-nvidiacl/include -Igeneric/include -Xclang -mlink-bitcode-file -Xclang ptx32--nvidiacl/lib/builtins.bc -include clc/clc.h -Dcl_clang_storage_class_specifiers "$@"

133
libclc/configure.py Executable file
View File

@ -0,0 +1,133 @@
#!/usr/bin/python
def c_compiler_rule(b, name, description, compiler, flags):
command = "%s -MMD -MF $out.d %s -c -o $out $in" % (compiler, flags)
b.rule(name, command, description + " $out", depfile="$out.d")
from optparse import OptionParser
import os
from subprocess import *
import sys
srcdir = os.path.dirname(sys.argv[0])
sys.path.insert(0, os.path.join(srcdir, 'build'))
import metabuild
p = OptionParser()
p.add_option('--with-llvm-config', metavar='PATH',
help='use given llvm-config script')
p.add_option('-g', metavar='GENERATOR', default='make',
help='use given generator (default: make)')
(options, args) = p.parse_args()
llvm_config_exe = options.with_llvm_config or "llvm-config"
def llvm_config(args):
try:
proc = Popen([llvm_config_exe] + args, stdout=PIPE)
return proc.communicate()[0].rstrip().replace('\n', ' ')
except OSError:
print "Error executing llvm-config."
print "Please ensure that llvm-config is in your $PATH, or use --with-llvm-config."
sys.exit(1)
llvm_bindir = llvm_config(['--bindir'])
llvm_core_libs = llvm_config(['--ldflags', '--libs', 'core', 'bitreader', 'bitwriter'])
llvm_cxxflags = llvm_config(['--cxxflags']) + ' -fno-exceptions -fno-rtti'
llvm_clang = os.path.join(llvm_bindir, 'clang')
llvm_link = os.path.join(llvm_bindir, 'llvm-link')
llvm_opt = os.path.join(llvm_bindir, 'opt')
default_targets = ['ptx32--nvidiacl', 'ptx64--nvidiacl']
targets = args
if not targets:
targets = default_targets
b = metabuild.from_name(options.g)
b.rule("LLVM_AS", "%s -o $out $in" % os.path.join(llvm_bindir, "llvm-as"),
'LLVM-AS $out')
b.rule("LLVM_LINK", command = llvm_link + " -o $out $in",
description = 'LLVM-LINK $out')
b.rule("OPT", command = llvm_opt + " -O3 -o $out $in",
description = 'OPT $out')
c_compiler_rule(b, "LLVM_TOOL_CXX", 'CXX', 'c++', llvm_cxxflags)
b.rule("LLVM_TOOL_LINK", "c++ -o $out $in %s" % llvm_core_libs, 'LINK $out')
prepare_builtins = os.path.join('utils', 'prepare-builtins')
b.build(os.path.join('utils', 'prepare-builtins.o'), "LLVM_TOOL_CXX",
os.path.join(srcdir, 'utils', 'prepare-builtins.cpp'))
b.build(prepare_builtins, "LLVM_TOOL_LINK",
os.path.join('utils', 'prepare-builtins.o'))
b.rule("PREPARE_BUILTINS", "%s -o $out $in" % prepare_builtins,
'PREPARE-BUILTINS $out')
manifest_deps = set([sys.argv[0], os.path.join(srcdir, 'build', 'metabuild.py'),
os.path.join(srcdir, 'build', 'ninja_syntax.py')])
for target in targets:
(t_arch, t_vendor, t_os) = target.split('-')
archs = [t_arch]
if t_arch == 'ptx32' or t_arch == 'ptx64':
archs.append('ptx')
archs.append('generic')
subdirs = []
for arch in archs:
subdirs.append("%s-%s-%s" % (arch, t_vendor, t_os))
subdirs.append("%s-%s" % (arch, t_os))
subdirs.append(arch)
subdirs = [subdir for subdir in subdirs
if os.path.isdir(os.path.join(srcdir, subdir, 'include')) or
os.path.isfile(os.path.join(srcdir, subdir, 'lib', 'SOURCES'))]
clang_cl_includes = ' '.join(["-I%s" % os.path.join(srcdir, subdir, 'include')
for subdir in subdirs])
# The rule for building a .bc file for the specified architecture using clang.
clang_bc_flags = "-ccc-host-triple %s -I`dirname $in` %s " \
"-Dcl_clang_storage_class_specifiers " \
"-emit-llvm" % (target, clang_cl_includes)
clang_bc_rule = "CLANG_CL_BC_" + target
c_compiler_rule(b, clang_bc_rule, "LLVM-CC", llvm_clang, clang_bc_flags)
objects = []
sources_seen = set()
for subdir in subdirs:
src_libdir = os.path.join(srcdir, subdir, 'lib')
if not os.path.isdir(src_libdir):
continue
subdir_list_file = os.path.join(src_libdir, 'SOURCES')
manifest_deps.add(subdir_list_file)
for src in open(subdir_list_file).readlines():
src = src.rstrip()
if src not in sources_seen:
sources_seen.add(src)
obj = os.path.join(target, 'lib', src + '.bc')
objects.append(obj)
src_file = os.path.join(src_libdir, src)
ext = os.path.splitext(src)[1]
if ext == '.ll':
b.build(obj, 'LLVM_AS', src_file)
else:
b.build(obj, clang_bc_rule, src_file)
builtins_link_bc = os.path.join(target, 'lib', 'builtins.link.bc')
builtins_opt_bc = os.path.join(target, 'lib', 'builtins.opt.bc')
builtins_bc = os.path.join(target, 'lib', 'builtins.bc')
b.build(builtins_link_bc, "LLVM_LINK", objects)
b.build(builtins_opt_bc, "OPT", builtins_link_bc)
b.build(builtins_bc, "PREPARE_BUILTINS", builtins_opt_bc, prepare_builtins)
b.rule("configure", command = ' '.join(sys.argv), description = 'CONFIGURE',
generator = True)
b.build(b.output_filename(), 'configure', list(manifest_deps))
b.finish()

View File

@ -0,0 +1,53 @@
#define as_char(x) __builtin_astype(x, char)
#define as_uchar(x) __builtin_astype(x, uchar)
#define as_short(x) __builtin_astype(x, short)
#define as_ushort(x) __builtin_astype(x, ushort)
#define as_int(x) __builtin_astype(x, int)
#define as_uint(x) __builtin_astype(x, uint)
#define as_long(x) __builtin_astype(x, long)
#define as_ulong(x) __builtin_astype(x, ulong)
#define as_char2(x) __builtin_astype(x, char2)
#define as_uchar2(x) __builtin_astype(x, uchar2)
#define as_short2(x) __builtin_astype(x, short2)
#define as_ushort2(x) __builtin_astype(x, ushort2)
#define as_int2(x) __builtin_astype(x, int2)
#define as_uint2(x) __builtin_astype(x, uint2)
#define as_long2(x) __builtin_astype(x, long2)
#define as_ulong2(x) __builtin_astype(x, ulong2)
#define as_char3(x) __builtin_astype(x, char3)
#define as_uchar3(x) __builtin_astype(x, uchar3)
#define as_short3(x) __builtin_astype(x, short3)
#define as_ushort3(x) __builtin_astype(x, ushort3)
#define as_int3(x) __builtin_astype(x, int3)
#define as_uint3(x) __builtin_astype(x, uint3)
#define as_long3(x) __builtin_astype(x, long3)
#define as_ulong3(x) __builtin_astype(x, ulong3)
#define as_char4(x) __builtin_astype(x, char4)
#define as_uchar4(x) __builtin_astype(x, uchar4)
#define as_short4(x) __builtin_astype(x, short4)
#define as_ushort4(x) __builtin_astype(x, ushort4)
#define as_int4(x) __builtin_astype(x, int4)
#define as_uint4(x) __builtin_astype(x, uint4)
#define as_long4(x) __builtin_astype(x, long4)
#define as_ulong4(x) __builtin_astype(x, ulong4)
#define as_char8(x) __builtin_astype(x, char8)
#define as_uchar8(x) __builtin_astype(x, uchar8)
#define as_short8(x) __builtin_astype(x, short8)
#define as_ushort8(x) __builtin_astype(x, ushort8)
#define as_int8(x) __builtin_astype(x, int8)
#define as_uint8(x) __builtin_astype(x, uint8)
#define as_long8(x) __builtin_astype(x, long8)
#define as_ulong8(x) __builtin_astype(x, ulong8)
#define as_char16(x) __builtin_astype(x, char16)
#define as_uchar16(x) __builtin_astype(x, uchar16)
#define as_short16(x) __builtin_astype(x, short16)
#define as_ushort16(x) __builtin_astype(x, ushort16)
#define as_int16(x) __builtin_astype(x, int16)
#define as_uint16(x) __builtin_astype(x, uint16)
#define as_long16(x) __builtin_astype(x, long16)
#define as_ulong16(x) __builtin_astype(x, ulong16)

View File

@ -0,0 +1,57 @@
#ifndef cl_clang_storage_class_specifiers
#error Implementation requires cl_clang_storage_class_specifiers extension!
#endif
#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
/* Function Attributes */
#include <clc/clcfunc.h>
/* Pattern Macro Definitions */
#include <clc/clcmacro.h>
/* 6.1 Supported Data Types */
#include <clc/clctypes.h>
/* 6.2.4.2 Reinterpreting Types Using as_type() and as_typen() */
#include <clc/as_type.h>
/* 6.11.1 Work-Item Functions */
#include <clc/workitem/get_global_size.h>
#include <clc/workitem/get_global_id.h>
#include <clc/workitem/get_local_size.h>
#include <clc/workitem/get_local_id.h>
#include <clc/workitem/get_num_groups.h>
#include <clc/workitem/get_group_id.h>
/* 6.11.2 Math Functions */
#include <clc/math/cos.h>
#include <clc/math/sin.h>
#include <clc/math/sqrt.h>
#include <clc/math/native_cos.h>
#include <clc/math/native_divide.h>
#include <clc/math/native_sin.h>
#include <clc/math/native_sqrt.h>
/* 6.11.3 Integer Functions */
#include <clc/integer/abs.h>
#include <clc/integer/abs_diff.h>
#include <clc/integer/add_sat.h>
/* 6.11.5 Geometric Functions */
#include <clc/geometric/cross.h>
#include <clc/geometric/length.h>
#include <clc/geometric/normalize.h>
/* 6.11.6 Relational Functions */
#include <clc/relational/select.h>
/* 6.11.8 Synchronization Functions */
#include <clc/synchronization/cl_mem_fence_flags.h>
#include <clc/synchronization/barrier.h>
#pragma OPENCL EXTENSION all : disable

View File

@ -0,0 +1,4 @@
#define _CLC_OVERLOAD __attribute__((overloadable))
#define _CLC_DECL
#define _CLC_DEF __attribute__((always_inline))
#define _CLC_INLINE __attribute__((always_inline)) static inline

View File

@ -0,0 +1,42 @@
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
} \
\
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
} \
\
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \
return (RET_TYPE##4)(FUNCTION(x.lo), FUNCTION(x.hi)); \
} \
\
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \
return (RET_TYPE##8)(FUNCTION(x.lo), FUNCTION(x.hi)); \
} \
\
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \
return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
}
#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
} \
\
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
FUNCTION(x.z, y.z)); \
} \
\
DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y) { \
return (RET_TYPE##4)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
} \
\
DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y) { \
return (RET_TYPE##8)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
} \
\
DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y) { \
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
}

View File

@ -0,0 +1,74 @@
/* 6.1.1 Built-in Scalar Data Types */
#include <stddef.h>
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
/* 6.1.2 Built-in Vector Data Types */
typedef __attribute__((ext_vector_type(2))) char char2;
typedef __attribute__((ext_vector_type(3))) char char3;
typedef __attribute__((ext_vector_type(4))) char char4;
typedef __attribute__((ext_vector_type(8))) char char8;
typedef __attribute__((ext_vector_type(16))) char char16;
typedef __attribute__((ext_vector_type(2))) uchar uchar2;
typedef __attribute__((ext_vector_type(3))) uchar uchar3;
typedef __attribute__((ext_vector_type(4))) uchar uchar4;
typedef __attribute__((ext_vector_type(8))) uchar uchar8;
typedef __attribute__((ext_vector_type(16))) uchar uchar16;
typedef __attribute__((ext_vector_type(2))) short short2;
typedef __attribute__((ext_vector_type(3))) short short3;
typedef __attribute__((ext_vector_type(4))) short short4;
typedef __attribute__((ext_vector_type(8))) short short8;
typedef __attribute__((ext_vector_type(16))) short short16;
typedef __attribute__((ext_vector_type(2))) ushort ushort2;
typedef __attribute__((ext_vector_type(3))) ushort ushort3;
typedef __attribute__((ext_vector_type(4))) ushort ushort4;
typedef __attribute__((ext_vector_type(8))) ushort ushort8;
typedef __attribute__((ext_vector_type(16))) ushort ushort16;
typedef __attribute__((ext_vector_type(2))) int int2;
typedef __attribute__((ext_vector_type(3))) int int3;
typedef __attribute__((ext_vector_type(4))) int int4;
typedef __attribute__((ext_vector_type(8))) int int8;
typedef __attribute__((ext_vector_type(16))) int int16;
typedef __attribute__((ext_vector_type(2))) uint uint2;
typedef __attribute__((ext_vector_type(3))) uint uint3;
typedef __attribute__((ext_vector_type(4))) uint uint4;
typedef __attribute__((ext_vector_type(8))) uint uint8;
typedef __attribute__((ext_vector_type(16))) uint uint16;
typedef __attribute__((ext_vector_type(2))) long long2;
typedef __attribute__((ext_vector_type(3))) long long3;
typedef __attribute__((ext_vector_type(4))) long long4;
typedef __attribute__((ext_vector_type(8))) long long8;
typedef __attribute__((ext_vector_type(16))) long long16;
typedef __attribute__((ext_vector_type(2))) ulong ulong2;
typedef __attribute__((ext_vector_type(3))) ulong ulong3;
typedef __attribute__((ext_vector_type(4))) ulong ulong4;
typedef __attribute__((ext_vector_type(8))) ulong ulong8;
typedef __attribute__((ext_vector_type(16))) ulong ulong16;
typedef __attribute__((ext_vector_type(2))) float float2;
typedef __attribute__((ext_vector_type(3))) float float3;
typedef __attribute__((ext_vector_type(4))) float float4;
typedef __attribute__((ext_vector_type(8))) float float8;
typedef __attribute__((ext_vector_type(16))) float float16;
/* 9.3 Double Precision Floating-Point */
#ifdef cl_khr_fp64
typedef __attribute__((ext_vector_type(2))) double double2;
typedef __attribute__((ext_vector_type(3))) double double3;
typedef __attribute__((ext_vector_type(4))) double double4;
typedef __attribute__((ext_vector_type(8))) double double8;
typedef __attribute__((ext_vector_type(16))) double double16;
#endif

View File

@ -0,0 +1,51 @@
#define GENTYPE float
#include BODY
#undef GENTYPE
#define GENTYPE float2
#include BODY
#undef GENTYPE
#define GENTYPE float3
#include BODY
#undef GENTYPE
#define GENTYPE float4
#include BODY
#undef GENTYPE
#define GENTYPE float8
#include BODY
#undef GENTYPE
#define GENTYPE float16
#include BODY
#undef GENTYPE
#ifdef cl_khr_fp64
#define GENTYPE double
#include BODY
#undef GENTYPE
#define GENTYPE double2
#include BODY
#undef GENTYPE
#define GENTYPE double3
#include BODY
#undef GENTYPE
#define GENTYPE double4
#include BODY
#undef GENTYPE
#define GENTYPE double8
#include BODY
#undef GENTYPE
#define GENTYPE double16
#include BODY
#undef GENTYPE
#endif
#undef BODY

View File

@ -0,0 +1,2 @@
_CLC_OVERLOAD _CLC_DECL float3 cross(float3 p0, float3 p1);
_CLC_OVERLOAD _CLC_DECL float4 cross(float4 p0, float4 p1);

View File

@ -0,0 +1,2 @@
#define BODY <clc/geometric/distance.inc>
#include <clc/geometric/floatn.inc>

View File

@ -0,0 +1,2 @@
#define BODY <clc/geometric/dot.inc>
#include <clc/geometric/floatn.inc>

View File

@ -0,0 +1,35 @@
#define FLOATN float
#include BODY
#undef FLOATN
#define FLOATN float2
#include BODY
#undef FLOATN
#define FLOATN float3
#include BODY
#undef FLOATN
#define FLOATN float4
#include BODY
#undef FLOATN
#ifdef cl_khr_fp64
#define FLOATN double
#include BODY
#undef FLOATN
#define FLOATN double2
#include BODY
#undef FLOATN
#define FLOATN double3
#include BODY
#undef FLOATN
#define FLOATN double4
#include BODY
#undef FLOATN
#endif
#undef BODY

View File

@ -0,0 +1,2 @@
#define BODY <clc/geometric/length.inc>
#include <clc/geometric/floatn.inc>

View File

@ -0,0 +1 @@
_CLC_OVERLOAD _CLC_DECL float length(FLOATN p0);

View File

@ -0,0 +1,2 @@
#define BODY <clc/geometric/normalize.inc>
#include <clc/geometric/floatn.inc>

View File

@ -0,0 +1 @@
_CLC_OVERLOAD _CLC_DECL FLOATN normalize(FLOATN p);

View File

@ -0,0 +1,2 @@
#define BODY <clc/integer/abs.inc>
#include <clc/integer/gentype.inc>

View File

@ -0,0 +1 @@
_CLC_OVERLOAD _CLC_DECL UGENTYPE abs(GENTYPE x);

View File

@ -0,0 +1,2 @@
#define BODY <clc/integer/abs_diff.inc>
#include <clc/integer/gentype.inc>

View File

@ -0,0 +1 @@
_CLC_OVERLOAD _CLC_DECL UGENTYPE abs_diff(GENTYPE x, GENTYPE y);

View File

@ -0,0 +1,2 @@
#define BODY <clc/integer/add_sat.inc>
#include <clc/integer/gentype.inc>

View File

@ -0,0 +1 @@
_CLC_OVERLOAD _CLC_DECL GENTYPE add_sat(GENTYPE x, GENTYPE y);

View File

@ -0,0 +1,385 @@
#define GENTYPE char
#define UGENTYPE uchar
#define SGENTYPE char
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE char2
#define UGENTYPE uchar2
#define SGENTYPE char2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE char3
#define UGENTYPE uchar3
#define SGENTYPE char3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE char4
#define UGENTYPE uchar4
#define SGENTYPE char4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE char8
#define UGENTYPE uchar8
#define SGENTYPE char8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE char16
#define UGENTYPE uchar16
#define SGENTYPE char16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uchar
#define UGENTYPE uchar
#define SGENTYPE char
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uchar2
#define UGENTYPE uchar2
#define SGENTYPE char2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uchar3
#define UGENTYPE uchar3
#define SGENTYPE char3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uchar4
#define UGENTYPE uchar4
#define SGENTYPE char4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uchar8
#define UGENTYPE uchar8
#define SGENTYPE char8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uchar16
#define UGENTYPE uchar16
#define SGENTYPE char16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE short
#define UGENTYPE ushort
#define SGENTYPE short
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE short2
#define UGENTYPE ushort2
#define SGENTYPE short2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE short3
#define UGENTYPE ushort3
#define SGENTYPE short3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE short4
#define UGENTYPE ushort4
#define SGENTYPE short4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE short8
#define UGENTYPE ushort8
#define SGENTYPE short8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE short16
#define UGENTYPE ushort16
#define SGENTYPE short16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ushort
#define UGENTYPE ushort
#define SGENTYPE short
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ushort2
#define UGENTYPE ushort2
#define SGENTYPE short2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ushort3
#define UGENTYPE ushort3
#define SGENTYPE short3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ushort4
#define UGENTYPE ushort4
#define SGENTYPE short4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ushort8
#define UGENTYPE ushort8
#define SGENTYPE short8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ushort16
#define UGENTYPE ushort16
#define SGENTYPE short16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE int
#define UGENTYPE uint
#define SGENTYPE int
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE int2
#define UGENTYPE uint2
#define SGENTYPE int2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE int3
#define UGENTYPE uint3
#define SGENTYPE int3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE int4
#define UGENTYPE uint4
#define SGENTYPE int4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE int8
#define UGENTYPE uint8
#define SGENTYPE int8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE int16
#define UGENTYPE uint16
#define SGENTYPE int16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uint
#define UGENTYPE uint
#define SGENTYPE int
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uint2
#define UGENTYPE uint2
#define SGENTYPE int2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uint3
#define UGENTYPE uint3
#define SGENTYPE int3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uint4
#define UGENTYPE uint4
#define SGENTYPE int4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uint8
#define UGENTYPE uint8
#define SGENTYPE int8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE uint16
#define UGENTYPE uint16
#define SGENTYPE int16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE long
#define UGENTYPE ulong
#define SGENTYPE long
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE long2
#define UGENTYPE ulong2
#define SGENTYPE long2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE long3
#define UGENTYPE ulong3
#define SGENTYPE long3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE long4
#define UGENTYPE ulong4
#define SGENTYPE long4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE long8
#define UGENTYPE ulong8
#define SGENTYPE long8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE long16
#define UGENTYPE ulong16
#define SGENTYPE long16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ulong
#define UGENTYPE ulong
#define SGENTYPE long
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ulong2
#define UGENTYPE ulong2
#define SGENTYPE long2
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ulong3
#define UGENTYPE ulong3
#define SGENTYPE long3
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ulong4
#define UGENTYPE ulong4
#define SGENTYPE long4
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ulong8
#define UGENTYPE ulong8
#define SGENTYPE long8
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#define GENTYPE ulong16
#define UGENTYPE ulong16
#define SGENTYPE long16
#include BODY
#undef GENTYPE
#undef UGENTYPE
#undef SGENTYPE
#undef BODY

View File

@ -0,0 +1,6 @@
#undef cos
#define cos __clc_cos
#define FUNCTION __clc_cos
#define INTRINSIC "llvm.cos"
#include <clc/math/unary_intrin.inc>

View File

@ -0,0 +1 @@
#define native_cos cos

View File

@ -0,0 +1 @@
#define native_divide(x, y) ((x) / (y))

View File

@ -0,0 +1 @@
#define native_sin sin

View File

@ -0,0 +1 @@
#define native_sqrt sqrt

View File

@ -0,0 +1,6 @@
#undef sin
#define sin __clc_sin
#define FUNCTION __clc_sin
#define INTRINSIC "llvm.sin"
#include <clc/math/unary_intrin.inc>

View File

@ -0,0 +1,6 @@
#undef sqrt
#define sqrt __clc_sqrt
#define FUNCTION __clc_sqrt
#define INTRINSIC "llvm.sqrt"
#include <clc/math/unary_intrin.inc>

View File

@ -0,0 +1 @@
_CLC_OVERLOAD _CLC_DECL GENTYPE FUNCTION(GENTYPE x);

View File

@ -0,0 +1,18 @@
_CLC_OVERLOAD float FUNCTION(float f) __asm(INTRINSIC ".f32");
_CLC_OVERLOAD float2 FUNCTION(float2 f) __asm(INTRINSIC ".v2f32");
_CLC_OVERLOAD float3 FUNCTION(float3 f) __asm(INTRINSIC ".v3f32");
_CLC_OVERLOAD float4 FUNCTION(float4 f) __asm(INTRINSIC ".v4f32");
_CLC_OVERLOAD float8 FUNCTION(float8 f) __asm(INTRINSIC ".v8f32");
_CLC_OVERLOAD float16 FUNCTION(float16 f) __asm(INTRINSIC ".v16f32");
#ifdef cl_khr_fp64
_CLC_OVERLOAD double FUNCTION(double d) __asm(INTRINSIC ".f64");
_CLC_OVERLOAD double2 FUNCTION(double2 d) __asm(INTRINSIC ".v2f64");
_CLC_OVERLOAD double3 FUNCTION(double3 d) __asm(INTRINSIC ".v3f64");
_CLC_OVERLOAD double4 FUNCTION(double4 d) __asm(INTRINSIC ".v4f64");
_CLC_OVERLOAD double8 FUNCTION(double8 d) __asm(INTRINSIC ".v8f64");
_CLC_OVERLOAD double16 FUNCTION(double16 d) __asm(INTRINSIC ".v16f64");
#endif
#undef FUNCTION
#undef INTRINSIC

View File

@ -0,0 +1 @@
#define select(a, b, c) ((c) ? (b) : (a))

View File

@ -0,0 +1,4 @@
typedef uint cl_mem_fence_flags;
#define CLK_LOCAL_MEM_FENCE 1
#define CLK_GLOBAL_MEM_FENCE 2

View File

@ -0,0 +1,8 @@
geometric/cross.cl
geometric/dot.cl
geometric/length.cl
geometric/normalize.cl
integer/abs.cl
integer/add_sat.cl
integer/add_sat.ll
integer/add_sat_impl.ll

View File

@ -0,0 +1,11 @@
#include <clc/clc.h>
_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x);
}
_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
p0.x*p1.y - p0.y*p1.x, 0.f);
}

View File

@ -0,0 +1,17 @@
#include <clc/clc.h>
_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
return p0*p1;
}
_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
return p0.x*p1.x + p0.y*p1.y;
}
_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
}
_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
}

View File

@ -0,0 +1,4 @@
#include <clc/clc.h>
#define BODY "length.inc"
#include <clc/geometric/floatn.inc>

View File

@ -0,0 +1,3 @@
_CLC_OVERLOAD _CLC_DEF float length(FLOATN p) {
return native_sqrt(dot(p, p));
}

View File

@ -0,0 +1,4 @@
#include <clc/clc.h>
#define BODY "normalize.inc"
#include <clc/geometric/floatn.inc>

View File

@ -0,0 +1,3 @@
_CLC_OVERLOAD _CLC_DEF FLOATN normalize(FLOATN p) {
return p/length(p);
}

View File

@ -0,0 +1,4 @@
#include <clc/clc.h>
#define BODY <abs.inc>
#include <clc/integer/gentype.inc>

View File

@ -0,0 +1,3 @@
_CLC_OVERLOAD _CLC_DEF UGENTYPE abs(GENTYPE x) {
return __builtin_astype((GENTYPE)(x > (GENTYPE)(0) ? x : -x), UGENTYPE);
}

View File

@ -0,0 +1,4 @@
#include <clc/clc.h>
#define BODY <abs_diff.inc>
#include <clc/integer/gentype.inc>

View File

@ -0,0 +1,3 @@
_CLC_OVERLOAD _CLC_DEF UGENTYPE abs_diff(GENTYPE x) {
return __builtin_astype((GENTYPE)(x > y ? x-y : y-x), UGENTYPE);
}

View File

@ -0,0 +1,52 @@
#include <clc/clc.h>
// From add_sat.ll
_CLC_DECL char __clc_add_sat_s8(char, char);
_CLC_DECL char __clc_add_sat_u8(uchar, uchar);
_CLC_DECL char __clc_add_sat_s16(short, short);
_CLC_DECL char __clc_add_sat_u16(ushort, ushort);
_CLC_DECL char __clc_add_sat_s32(int, int);
_CLC_DECL char __clc_add_sat_u32(uint, uint);
_CLC_DECL char __clc_add_sat_s64(long, long);
_CLC_DECL char __clc_add_sat_u64(ulong, ulong);
_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
return __clc_add_sat_s8(x, y);
}
_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
return __clc_add_sat_u8(x, y);
}
_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
return __clc_add_sat_s16(x, y);
}
_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
return __clc_add_sat_u16(x, y);
}
_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
return __clc_add_sat_s32(x, y);
}
_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
return __clc_add_sat_u32(x, y);
}
_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
return __clc_add_sat_s64(x, y);
}
_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
return __clc_add_sat_u64(x, y);
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long)
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong)

View File

@ -0,0 +1,55 @@
declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
define linkonce_odr i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
define linkonce_odr i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
define linkonce_odr i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
define linkonce_odr i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
define linkonce_odr i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
define linkonce_odr i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
define linkonce_odr i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
define linkonce_odr i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}

View File

@ -0,0 +1,83 @@
declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
define linkonce_odr i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
%x.msb = ashr i8 %x, 7
%x.limit = xor i8 %x.msb, 127
%sat = select i1 %over, i8 %x.limit, i8 %res
ret i8 %sat
}
define linkonce_odr i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y)
%res = extractvalue {i8, i1} %call, 0
%over = extractvalue {i8, i1} %call, 1
%sat = select i1 %over, i8 -1, i8 %res
ret i8 %sat
}
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
define linkonce_odr i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
%x.msb = ashr i16 %x, 15
%x.limit = xor i16 %x.msb, 32767
%sat = select i1 %over, i16 %x.limit, i16 %res
ret i16 %sat
}
define linkonce_odr i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 %y)
%res = extractvalue {i16, i1} %call, 0
%over = extractvalue {i16, i1} %call, 1
%sat = select i1 %over, i16 -1, i16 %res
ret i16 %sat
}
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
define linkonce_odr i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
%x.msb = ashr i32 %x, 31
%x.limit = xor i32 %x.msb, 2147483647
%sat = select i1 %over, i32 %x.limit, i32 %res
ret i32 %sat
}
define linkonce_odr i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
%res = extractvalue {i32, i1} %call, 0
%over = extractvalue {i32, i1} %call, 1
%sat = select i1 %over, i32 -1, i32 %res
ret i32 %sat
}
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
define linkonce_odr i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
%x.msb = ashr i64 %x, 63
%x.limit = xor i64 %x.msb, 9223372036854775807
%sat = select i1 %over, i64 %x.limit, i64 %res
ret i64 %sat
}
define linkonce_odr i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %x, i64 %y)
%res = extractvalue {i64, i1} %call, 0
%over = extractvalue {i64, i1} %call, 1
%sat = select i1 %over, i64 -1, i64 %res
ret i64 %sat
}

View File

@ -0,0 +1,6 @@
_CLC_INLINE void barrier(cl_mem_fence_flags flags) {
if (flags & CLK_LOCAL_MEM_FENCE) {
__builtin_ptx_bar_sync(0);
}
}

View File

@ -0,0 +1,8 @@
_CLC_INLINE size_t get_global_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ctaid_x()*__builtin_ptx_read_ntid_x()+__builtin_ptx_read_tid_x();
case 1: return __builtin_ptx_read_ctaid_y()*__builtin_ptx_read_ntid_y()+__builtin_ptx_read_tid_y();
case 2: return __builtin_ptx_read_ctaid_z()*__builtin_ptx_read_ntid_z()+__builtin_ptx_read_tid_z();
default: return 0;
}
}

View File

@ -0,0 +1,8 @@
_CLC_INLINE size_t get_global_size(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_nctaid_x()*__builtin_ptx_read_ntid_x();
case 1: return __builtin_ptx_read_nctaid_y()*__builtin_ptx_read_ntid_y();
case 2: return __builtin_ptx_read_nctaid_z()*__builtin_ptx_read_ntid_z();
default: return 0;
}
}

View File

@ -0,0 +1,8 @@
_CLC_INLINE size_t get_group_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ctaid_x();
case 1: return __builtin_ptx_read_ctaid_y();
case 2: return __builtin_ptx_read_ctaid_z();
default: return 0;
}
}

View File

@ -0,0 +1,8 @@
_CLC_INLINE size_t get_local_id(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_tid_x();
case 1: return __builtin_ptx_read_tid_y();
case 2: return __builtin_ptx_read_tid_z();
default: return 0;
}
}

View File

@ -0,0 +1,8 @@
_CLC_INLINE size_t get_local_size(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_ntid_x();
case 1: return __builtin_ptx_read_ntid_y();
case 2: return __builtin_ptx_read_ntid_z();
default: return 0;
}
}

View File

@ -0,0 +1,8 @@
_CLC_INLINE size_t get_num_groups(uint dim) {
switch (dim) {
case 0: return __builtin_ptx_read_nctaid_x();
case 1: return __builtin_ptx_read_nctaid_y();
case 2: return __builtin_ptx_read_nctaid_z();
default: return 0;
}
}

View File

1
libclc/ptx/lib/SOURCES Normal file
View File

@ -0,0 +1 @@
integer/add_sat.ll

View File

@ -0,0 +1,55 @@
declare i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
define linkonce_odr ptx_device i8 @__clc_add_sat_s8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_s8(i8 %x, i8 %y)
ret i8 %call
}
declare i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
define linkonce_odr ptx_device i8 @__clc_add_sat_u8(i8 %x, i8 %y) nounwind readnone alwaysinline {
%call = call i8 @__clc_add_sat_impl_u8(i8 %x, i8 %y)
ret i8 %call
}
declare i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
define linkonce_odr ptx_device i16 @__clc_add_sat_s16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_s16(i16 %x, i16 %y)
ret i16 %call
}
declare i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
define linkonce_odr ptx_device i16 @__clc_add_sat_u16(i16 %x, i16 %y) nounwind readnone alwaysinline {
%call = call i16 @__clc_add_sat_impl_u16(i16 %x, i16 %y)
ret i16 %call
}
declare i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
define linkonce_odr ptx_device i32 @__clc_add_sat_s32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_s32(i32 %x, i32 %y)
ret i32 %call
}
declare i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
define linkonce_odr ptx_device i32 @__clc_add_sat_u32(i32 %x, i32 %y) nounwind readnone alwaysinline {
%call = call i32 @__clc_add_sat_impl_u32(i32 %x, i32 %y)
ret i32 %call
}
declare i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
define linkonce_odr ptx_device i64 @__clc_add_sat_s64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_s64(i64 %x, i64 %y)
ret i64 %call
}
declare i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
define linkonce_odr ptx_device i64 @__clc_add_sat_u64(i64 %x, i64 %y) nounwind readnone alwaysinline {
%call = call i64 @__clc_add_sat_impl_u64(i64 %x, i64 %y)
ret i64 %call
}

3
libclc/test/cos.cl Normal file
View File

@ -0,0 +1,3 @@
__kernel void foo(float4 *f) {
*f = cos(*f);
}

3
libclc/test/cross.cl Normal file
View File

@ -0,0 +1,3 @@
__kernel void foo(float4 *f) {
*f = cross(f[0], f[1]);
}

View File

@ -0,0 +1,3 @@
__kernel void foo(int *i) {
i[get_group_id(0)] = 1;
}

View File

@ -0,0 +1,81 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include "llvm/Support/ToolOutputFile.h"
using namespace llvm;
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
static cl::opt<std::string>
OutputFilename("o", cl::desc("Output filename"),
cl::value_desc("filename"));
int main(int argc, char **argv) {
LLVMContext &Context = getGlobalContext();
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "libclc builtin preparation tool\n");
std::string ErrorMessage;
std::auto_ptr<Module> M;
{
OwningPtr<MemoryBuffer> BufferPtr;
if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
ErrorMessage = ec.message();
else
M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
}
if (M.get() == 0) {
errs() << argv[0] << ": ";
if (ErrorMessage.size())
errs() << ErrorMessage << "\n";
else
errs() << "bitcode didn't read correctly.\n";
return 1;
}
// Set linkage of every external definition to linkonce_odr.
for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
i->setLinkage(GlobalValue::LinkOnceODRLinkage);
}
for (Module::global_iterator i = M->global_begin(), e = M->global_end();
i != e; ++i) {
if (!i->isDeclaration() && i->getLinkage() == GlobalValue::ExternalLinkage)
i->setLinkage(GlobalValue::LinkOnceODRLinkage);
}
if (OutputFilename.empty()) {
errs() << "no output file\n";
return 1;
}
std::string ErrorInfo;
OwningPtr<tool_output_file> Out
(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
raw_fd_ostream::F_Binary));
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
exit(1);
}
WriteBitcodeToFile(M.get(), Out->os());
// Declare success.
Out->keep();
return 0;
}

53
libclc/www/index.html Normal file
View File

@ -0,0 +1,53 @@
<html>
<head>
<title>libclc</title>
</head>
<body>
<h1>libclc</h1>
<p>
libclc is an open source, BSD licensed
implementation of the library requirements of the
OpenCL C programming language, as specified by the <a
href="http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf">OpenCL
1.1 Specification</a>. The following sections of the specification
impose library requirements:
<ul>
<li>6.1: Supported Data Types
<li>6.2.3: Explicit Conversions
<li>6.2.4.2: Reinterpreting Types Using as_type() and as_typen()
<li>6.9: Preprocessor Directives and Macros
<li>6.11: Built-in Functions
<li>9.3: Double Precision Floating-Point
<li>9.4: 64-bit Atomics
<li>9.5: Writing to 3D image memory objects
<li>9.6: Half Precision Floating-Point
</ul>
</p>
<p>
libclc is intended to be used with the <a href="http://clang.llvm.org/">Clang</a>
compiler's OpenCL frontend.
</p>
<p>
libclc is designed to be portable and extensible. To this end,
it provides generic implementations of most library requirements,
allowing the target to override the generic implementation at the
granularity of individual functions.
</p>
<p>
libclc currently only supports the PTX target, but support for more
targets is welcome.
</p>
<h2>Download</h2>
git clone git://git.pcc.me.uk/~peter/libclc.git (<a href="http://git.pcc.me.uk/~peter/libclc.git">gitweb</a>)
<h2>Mailing List</h2>
libclc-dev@pcc.me.uk (<a href="http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev">subscribe/unsubscribe</a>, <a href="http://www.pcc.me.uk/pipermail/libclc-dev/">archives</a>)
</body>
</html>