mirror of
https://github.com/RPCSX/xed.git
synced 2024-11-23 11:39:40 +00:00
e7d734962c
Change-Id: I166833daaa56c33eca01bdf7b9aa6e74a490ba9a (cherry picked from commit 1212ba962dff6dfbfa0bd2469327ff447ce59058)
389 lines
14 KiB
Python
Executable File
389 lines
14 KiB
Python
Executable File
#BEGIN_LEGAL
|
|
#
|
|
#Copyright (c) 2017 Intel Corporation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
#END_LEGAL
|
|
|
|
import re
|
|
import ild_nt
|
|
import mbuild
|
|
import genutil
|
|
import ildutil
|
|
import codegen
|
|
import ild_eosz
|
|
import ild_info
|
|
import ild_codegen
|
|
import operand_storage
|
|
|
|
_imm_token = 'IMM_WIDTH'
|
|
_ild_t_imm_member = 'imm_width'
|
|
|
|
_uimm1_nt = 'UIMM8_1'
|
|
|
|
|
|
_l3_header_fn = 'xed-ild-imm-l3.h'
|
|
_l3_c_fn = 'xed-ild-imm-l3.c'
|
|
|
|
_l2_header_fn = 'xed-ild-imm-l2.h'
|
|
_l1_header_fn = 'xed-ild-imm-l1.h'
|
|
_l2_c_fn = 'xed-ild-imm-l2.c'
|
|
|
|
_eosz_const_suffix = 'CONST'
|
|
|
|
_imm0_fn = 'xed_lookup_function_0_IMM_WIDTH_CONST_l2'
|
|
|
|
_l1_header_fn = 'xed-ild-imm-l1.h'
|
|
|
|
_imm_lu_header_fn = 'xed-ild-imm-bytes.h'
|
|
|
|
|
|
|
|
def get_imm_nt_seq(ptrn_wrds, imm_nts):
|
|
"""
|
|
@param ptrn_wrds: list of tokens in instructions pattern
|
|
@type ptrn_wrds: [string]
|
|
@param imm_nts: list of names of IMM_WIDTH-binding NTs
|
|
@type imm_nts: [string]
|
|
|
|
@return nt_names: list of names of IMM_WIDTH binding NTs
|
|
@type nt_names: [string]
|
|
|
|
Returns a list of names of IMM-binding NTs in the pattern.
|
|
generally there is only one NT for IMM_WIDTH.
|
|
But ENTER, EXTRQ and INSERTQ instructions have two immediate
|
|
NTs in pattern. This strange NT UIMM8_1 doesn't bind IMM_WIDTH.
|
|
We should take special care of it.
|
|
It is also possible to track captured operands and to check if UIMM1
|
|
is among them, that would be a more generic way, but more complicated
|
|
and it seems a waste to implement it for just one rare UIMM_1 NT.
|
|
"""
|
|
return ild_nt.get_nt_seq(ptrn_wrds, list(imm_nts) + [_uimm1_nt])
|
|
|
|
|
|
|
|
def get_all_imm_seq(united_lookup):
|
|
"""
|
|
@param united_lookup: lookup of ild_info.ild_info_t objects representing
|
|
current ISA. This lookup should have been built from storage+grammar
|
|
@type uinted_lookup: ild_info.ild_storage_t
|
|
|
|
@return seq_list: list of all variations of IMM-binding NT sequences in
|
|
united_lookup.
|
|
@type seq_list: [ [string] ]
|
|
"""
|
|
all_seq = set()
|
|
infos = united_lookup.get_all_infos()
|
|
for info in infos:
|
|
#lists are unhashable, hence we have to use tuples instead
|
|
all_seq.add(tuple(info.imm_nt_seq))
|
|
#convert back to lists, in order not to surprise user
|
|
return_list = []
|
|
for nt_tuple in all_seq:
|
|
return_list.append(list(nt_tuple))
|
|
return return_list
|
|
|
|
|
|
def get_imm_binding_nts(agi):
|
|
"""
|
|
@param agi: all generator info object. main data structure of generator.
|
|
|
|
@return nt_list: list of names of NTs in the grammar that bind IMM_WIDTH
|
|
operand.
|
|
@type nt_list: [string]
|
|
"""
|
|
nt_names = ild_nt.get_setting_nts(agi, _imm_token)
|
|
#filter ONE nt
|
|
#FIXME: remove ONE nt from grammar
|
|
return list(filter(lambda x: x!='ONE', nt_names))
|
|
|
|
|
|
def get_target_opname():
|
|
"""
|
|
@return opname: name of the IMM operand - IMM_WIDTH
|
|
@type opname: string
|
|
"""
|
|
return _imm_token
|
|
|
|
|
|
def get_l2_fn_from_info(info, imm_dict):
|
|
is_const = ild_codegen.is_constant_l2_func(info.imm_nt_seq, imm_dict)
|
|
|
|
if is_const:
|
|
l2_fn = ild_codegen.get_l2_fn(info.imm_nt_seq, _imm_token, [], None,
|
|
_imm0_fn, True)
|
|
else:
|
|
l2_fn = ild_codegen.get_l2_fn(info.imm_nt_seq, _imm_token,
|
|
info.eosz_nt_seq,
|
|
ild_eosz.get_target_opname(),
|
|
_imm0_fn, False)
|
|
return l2_fn
|
|
|
|
|
|
|
|
def _gen_imm0_function(agi):
|
|
"""
|
|
for patterns that don't set IMM_WIDTH token
|
|
these patterns have has_im==0
|
|
and we define a L2 lookup function that returns 0
|
|
"""
|
|
#return_type = operand_storage.get_ctype(_imm_token)
|
|
return_type = 'void'
|
|
fo = codegen.function_object_t(_imm0_fn, return_type,
|
|
static=True, inline=True)
|
|
data_name = 'x'
|
|
fo.add_arg(ildutil.ild_c_type + ' %s' % data_name)
|
|
setter_fn = operand_storage.get_op_setter_fn(_ild_t_imm_member)
|
|
fo.add_code_eol('%s(%s, %s)' % (setter_fn, data_name,'0'))
|
|
return fo
|
|
|
|
|
|
def _is_imm_conflict(info_list, imm_dict):
|
|
"""Check if info list conflicts on imm_bytes property.
|
|
Sometimes info objects conflict on L2 function name, but those
|
|
different functions actually return same values.
|
|
For example:
|
|
L2 functions defined by UIMM8() and SIMM8() NTs have different names
|
|
but both are const functions returning 8. If info list has those
|
|
two L2 functions, we should discover that and return that there is no
|
|
conflict
|
|
|
|
@param info_list: list of info objects to check
|
|
@type info_list: [ild_info.ild_info_t
|
|
|
|
@param imm_dict: dictionary from IMM-NT names to corresponding
|
|
codegen.array_t objects describing those NTs
|
|
@type imm_dict: { string(nt_name) : codegen.array_t(nt_arr) }
|
|
|
|
@return: True|False - if there is a conflict in lookup function name
|
|
|
|
"""
|
|
if len(info_list) <= 1:
|
|
return False
|
|
first = info_list[0]
|
|
l2_fn_first = get_l2_fn_from_info(first, imm_dict)
|
|
|
|
for info in info_list[1:]:
|
|
l2_fn_cur = get_l2_fn_from_info(info, imm_dict)
|
|
|
|
if (l2_fn_first != l2_fn_cur):
|
|
#there are const l3 functions that return only one value:
|
|
#SIMM8 UIMM8 etc. If they return same value, they should not
|
|
#conflict
|
|
nt_seq1 = first.imm_nt_seq
|
|
nt_seq2 = info.imm_nt_seq
|
|
|
|
#check if we have double imm patterns
|
|
if len(nt_seq1) > 1 or len(nt_seq2) > 1:
|
|
#function names are different, hence conflict
|
|
return True
|
|
|
|
if len(nt_seq1) != len(nt_seq2):
|
|
return True
|
|
imm_arr1 = imm_dict[nt_seq1[0]]
|
|
imm_arr2 = imm_dict[nt_seq2[0]]
|
|
val_space1 = imm_arr1.get_values_space()
|
|
val_space2 = imm_arr2.get_values_space()
|
|
if len(val_space1) == len(val_space2) == 1:
|
|
if val_space1[0] == val_space2[0]:
|
|
continue
|
|
return True
|
|
return False
|
|
|
|
#fixme: write a good comment about conflict resolution in eosz and imm
|
|
#a list of conflict resolution functions to use when we have conflicts
|
|
#between info objects in the same map-opcode
|
|
#for example map 0, opcode c7 has xbegin and mov instructions that have
|
|
#different immediate nts - SIMMz for mov and no imm for xbegin
|
|
#and we decide by REG field which lookup function to use
|
|
_resolution_functions = [
|
|
#it seems that one resolution function is enough
|
|
ild_codegen.gen_l1_byreg_resolution_function,
|
|
]
|
|
|
|
#these are for second immediate guys.
|
|
#It also happens that AMD second immediate guys define uneasy conflicts
|
|
#so we are killing two birds with one stone
|
|
harcoded_res_functions = {
|
|
#(map, opcode) L1_function_name
|
|
('0x0F', '0x78') : 'xed_ild_hasimm_map0x0F_op0x78_l1',
|
|
('0x0', '0xc8') : 'xed_ild_hasimm_map0x0_op0xc8_l1'
|
|
}
|
|
|
|
def _resolve_conflicts(agi, info_list, imm_dict):
|
|
"""Try to resolve conflicts by applying the conflict resolution
|
|
functions defined in _resolution_functions list.
|
|
|
|
@param info_list: list of info objects to that have a conflict
|
|
@type info_list: [ild_info.ild_info_t
|
|
|
|
@param imm_dict: dictionary from IMM-NT names to corresponding
|
|
codegen.array_t objects describing those NTs
|
|
@type imm_dict: { string(nt_name) : codegen.array_t(nt_arr) }
|
|
|
|
@return: codegen.function_object_t defining the conflict resolution (L1)
|
|
function for info_list's map-opcode
|
|
|
|
"""
|
|
#FIXME: we can use ild_cdict.constraint_dict_t for resolving
|
|
#conflicts it would work for any patterns (now we try to resolve
|
|
#only by REG operand)
|
|
for func in _resolution_functions:
|
|
fo = func(agi,info_list, imm_dict, _is_imm_conflict,
|
|
get_l2_fn_from_info, _imm_token)
|
|
if fo:
|
|
return fo
|
|
return None
|
|
|
|
def gen_l1_functions_and_lookup(agi, united_lookup, imm_dict):
|
|
"""Compute L1(conflict resolution) functions list and imm_bytes
|
|
lookup tables dict.
|
|
@param agi: all generators info
|
|
|
|
@param united_lookup: the 2D lookup by map-opcode to info objects list.
|
|
united_lookup['0x0']['0x78'] == [ild_info1, ild_info2, ... ]
|
|
@type united_lookup:
|
|
{string(insn_map) : {string(opcode): [ild_info.ild_info_t]} }
|
|
|
|
|
|
"""
|
|
l1_resolution_fos = []
|
|
l1_lookup = {}
|
|
for insn_map in ild_info.get_dump_maps():
|
|
l1_lookup[insn_map] = {}
|
|
for opcode in range(0, 256):
|
|
#look in the hard-coded resolution functions
|
|
#they are manually written for the two-immediates instructions
|
|
if (insn_map, hex(opcode)) in harcoded_res_functions:
|
|
l1_fn = harcoded_res_functions[(insn_map, hex(opcode))]
|
|
l1_lookup[insn_map][hex(opcode)] = l1_fn
|
|
continue
|
|
info_list = united_lookup.get_info_list(insn_map, hex(opcode))
|
|
#get only info objects with minimum priority
|
|
info_list = ild_info.get_min_prio_list(info_list)
|
|
is_conflict = _is_imm_conflict(info_list, imm_dict)
|
|
|
|
if len(info_list) > 1 and is_conflict:
|
|
l1_fo = _resolve_conflicts(agi, info_list, imm_dict)
|
|
if not l1_fo:
|
|
ildutil.ild_err('FAILED TO GENERATE L1 CONFLICT ' +
|
|
'RESOLUTION FUNCTION FOR IMM\n infos:\n %s' %
|
|
"\n".join([str(info) for info in info_list]))
|
|
|
|
l1_resolution_fos.append(l1_fo)
|
|
l1_fn = l1_fo.function_name
|
|
#if map-opcode pair is undefined the lookup function ptr is NULL
|
|
#this will happen for opcodes like 0F in 0F map - totally illegal
|
|
#opcodes, that should never be looked up in runtime.
|
|
elif len(info_list) == 0:
|
|
l1_fn = '(%s)0' % (ildutil.l1_ptr_typename)
|
|
else:
|
|
#there are no conflicts, we can use L2 function as L1
|
|
info = info_list[0]
|
|
l1_fn = get_l2_fn_from_info(info, imm_dict)
|
|
if not l1_fn:
|
|
return None
|
|
l1_lookup[insn_map][hex(opcode)] = l1_fn
|
|
return l1_resolution_fos,l1_lookup
|
|
|
|
def _filter_uimm1_nt(imm_nt_names):
|
|
"""Filter UIMM8_1 NT from list"""
|
|
return list(filter(lambda x: x!=_uimm1_nt, imm_nt_names))
|
|
|
|
|
|
def work(agi, united_lookup, imm_nts, ild_gendir, eosz_dict,
|
|
debug):
|
|
"""
|
|
main entry point of the module.
|
|
"""
|
|
#dump lookup functions for each NT
|
|
#Let's call these function Level3 functions (L3)
|
|
nt_dict = {}
|
|
|
|
#generate the L3 functions
|
|
#Every NT, that changes IMM_WIDTH, defines a L3 function.
|
|
#For example SIMM8() NT defines a L3 function that returns 1 (1 byte).
|
|
#And SIMMv() NT defines a function that gets EOSZ and returns IMM_WIDTH
|
|
#value depending on EOSZ.
|
|
|
|
#UIMM8_1 doesn't bind IMM_WIDTH operand, it is a special case
|
|
#there is nothing to generate for it.
|
|
for nt_name in _filter_uimm1_nt(imm_nts):
|
|
array = ild_nt.gen_nt_lookup(agi, nt_name, _imm_token,
|
|
target_type=ildutil.ild_c_op_type,
|
|
level='l3')
|
|
nt_dict[nt_name] = array
|
|
|
|
#create function that calls all initialization functions for L3
|
|
init_f = ild_nt.gen_init_function(list(nt_dict.values()),
|
|
'xed_ild_imm_l3_init')
|
|
|
|
#dump L3 functions
|
|
ild_nt.dump_lu_arrays(agi, list(nt_dict.values()), _l3_c_fn,
|
|
mbuild.join('include-private',_l3_header_fn),
|
|
init_f)
|
|
|
|
#get all IMM NT sequences that are used in patterns
|
|
#The only case of IMM sequence is when we have UIMM1() NT - the second
|
|
#immediate NT.
|
|
all_imm_seq = get_all_imm_seq(united_lookup)
|
|
debug.write('IMM SEQS: %s\n' % all_imm_seq)
|
|
|
|
# L2 / Level2 functions: set imm_width
|
|
# Now we define functions that compute EOSZ value (using one of
|
|
# the EOSZ-resolution functions) and then use
|
|
# one of the L3 functions(that need EOSZ) to set IMM_WIDTH.
|
|
|
|
# The names of these functions should be something like
|
|
# xed_ild_SIMMz_OSZ_NONTERM_DF64 - to define the imm-binding nonterm
|
|
# and to define the EOSZ-resolution NT sequence.
|
|
# L2 functions are defined by single ild_info_t object - by its
|
|
# eosz_nt_seq and imm_nt_seq
|
|
l2_functions = ild_codegen.gen_l2_func_list(agi, nt_dict, eosz_dict,
|
|
_ild_t_imm_member)
|
|
#append function for imm_bytes==0
|
|
l2_functions.append(_gen_imm0_function(agi))
|
|
|
|
l2_headers = [ild_eosz.get_ntseq_header_fn(),
|
|
_l3_header_fn, ildutil.ild_header,
|
|
operand_storage.get_operand_accessors_fn()]
|
|
ild_codegen.dump_flist_2_header(agi, _l2_header_fn, l2_headers,
|
|
l2_functions)
|
|
|
|
# L1 / Level1 functions:
|
|
# Now we define functions that resolve conflicts (if any)
|
|
# using modrm.reg bits, and that way decide which L2 function to
|
|
# call to set the IMM value.
|
|
# These functions will be the value of map,opcode lookup tables.
|
|
|
|
# These functions should be dumped after we have a look on the
|
|
# united_lookup mapping in order to know what conflicts exist and
|
|
# for each conflict to create a resolution lookup table.
|
|
|
|
# L1 functions are defined by a list of ild_info_t objects that
|
|
# have same map,opcode.
|
|
res = gen_l1_functions_and_lookup(agi, united_lookup, nt_dict)
|
|
|
|
l1_functions,l1_lookup = res
|
|
|
|
ild_codegen.dump_flist_2_header(agi, _l1_header_fn, [_l2_header_fn],
|
|
l1_functions)
|
|
|
|
headers = [_l1_header_fn, ildutil.ild_private_header,
|
|
operand_storage.get_operand_accessors_fn()]
|
|
ild_codegen.dump_lookup(agi, l1_lookup, _ild_t_imm_member,
|
|
_imm_lu_header_fn, headers,
|
|
ildutil.l1_ptr_typename)
|
|
|