I am here to suggest a feature/ask for any more elegant ways to do what I have done. I wanted to remove the text “Leaf =” and color code the nodes based on percentiles of the predictions. Below is my code:
First finding the distribution of node values:
finding all leaves and their predictions
list_tree = []
list_preds = []
for i in range(999):
try:
tree = loaded_model.get_dump(fmap=’’,dump_format=‘dot’,)[i]
leaf_locs = [m.start() for m in re.finditer(‘leaf’, tree)]
for j, loc in enumerate(leaf_locs):
pred = round(float(tree[leaf_locs[j]:leaf_locs[j] + tree[leaf_locs[j]:].find(' ') - 1].replace('leaf','').replace('=','')),3)
list_tree.append(i)
list_preds.append(pred)
except:
break
preds_data = pd.concat([pd.DataFrame(list_tree),pd.DataFrame(list_preds)], axis = 1)
preds_data.columns = ([‘tree’,‘pred’])
neg_values = preds_data.loc[preds_data[‘pred’] <= 0.000]
pos_values = preds_data.loc[preds_data[‘pred’] > 0.000]
percentiles = [20,40,60,80,100]
pos_ntiles = []
for p in percentiles:
pos_ntiles.append(np.percentile(pos_values[‘pred’].to_list(),p))
pos_colors = [ # light to dark
‘#ffbaba’,
‘#ff7b7b’,
‘#ff5252’,
‘#ff2d2d’,
‘#ff0000’,
]
neg_ntiles = []
for p in percentiles:
neg_ntiles.append(np.percentile(neg_values[‘pred’].to_list(),p))
neg_colors = [ #dark to light
‘#acdda3’,
‘#bce3b5’,
‘#cdeac7’,
‘#ddf1da’,
‘#eef8ec’,
]
Next creating the tree using some of the source code with modifications:
from graphviz import Source
import json
iteration = 0
leaf_node_params = {
‘shape’: ‘box’,
‘style’: ‘filled’,
‘fillcolor’: ‘red’
}
extra = {‘size’: ‘100’, ‘ratio’: ‘.4’}
kwargs = {}
kwargs[‘leaf_node_params’] = leaf_node_params
for key, value in extra.items():
if ‘graph_attrs’ in kwargs.keys():
kwargs[‘graph_attrs’][key] = value
else:
kwargs[‘graph_attrs’] = {}
parameters = ‘dot’
if kwargs:
parameters += ‘:’
parameters += json.dumps(kwargs)
tree = loaded_model.get_dump(
fmap=’’,
dump_format=parameters,
)[iteration]
import re
leaf_locs = [m.start() for m in re.finditer(‘leaf’, tree)]
fillcolor_locs = [m.start() for m in re.finditer(‘fillcolor’, tree)]
string1 = tree[0:leaf_locs[0]-1]
list_of_strings = [string1]
i = 0
while i < len(fillcolor_locs):
string_x = tree[leaf_locs[i]-1:fillcolor_locs[i]+15]
if i == len(fillcolor_locs) - 1:
string_y_end = len(tree)
else:
string_y_end = leaf_locs[i+1]-1
string_y = tree[fillcolor_locs[i]+15:string_y_end]
list_of_strings.append(string_x)
list_of_strings.append(string_y)
i = i + 1
for index, value in enumerate(list_of_strings):
if ‘leaf’ in value:
x = value.replace(‘leaf’,’’)
leaf_value = round(float(x[2:x.find(‘fillcolor’,0,)-2]),3)
if leaf_value <= neg_ntiles[0]:
fillcolor = neg_colors[0]
elif leaf_value <= neg_ntiles[1]:
fillcolor = neg_colors[1]
elif leaf_value <= neg_ntiles[2]:
fillcolor = neg_colors[2]
elif leaf_value <= neg_ntiles[3]:
fillcolor = neg_colors[3]
elif leaf_value <= neg_ntiles[4]:
fillcolor = neg_colors[4]
elif leaf_value <= pos_ntiles[0]:
fillcolor = pos_colors[0]
elif leaf_value <= pos_ntiles[1]:
fillcolor = pos_colors[1]
elif leaf_value <= pos_ntiles[2]:
fillcolor = pos_colors[2]
elif leaf_value <= pos_ntiles[3]:
fillcolor = pos_colors[3]
else:
fillcolor = pos_colors[4]
y = '''"''' + str(leaf_value) + '''"''' + ' fillcolor ="' + fillcolor + '''"'''
list_of_strings[index] = y
tree = ‘’.join(list_of_strings)
image = Source(tree)
image.render(mypath, format = ‘png’)
I am open to suggestions/would love this to be standard as an option. I know it’s probably a big hack the way I walked through the dot text and replaced things. If there is a more elegant graphviz based way to do this, please share.