To_graphviz modification to color nodes based on prediction

I am here to suggest a feature/ask for any more elegant ways to do what I have done. I wanted to remove the text “Leaf =” and color code the nodes based on percentiles of the predictions. Below is my code:

First finding the distribution of node values:

finding all leaves and their predictions

list_tree = []
list_preds = []

for i in range(999):
try:
tree = loaded_model.get_dump(fmap=’’,dump_format=‘dot’,)[i]
leaf_locs = [m.start() for m in re.finditer(‘leaf’, tree)]

    for j, loc in enumerate(leaf_locs):
        pred = round(float(tree[leaf_locs[j]:leaf_locs[j] + tree[leaf_locs[j]:].find(' ') - 1].replace('leaf','').replace('=','')),3)
        list_tree.append(i)
        list_preds.append(pred)
except:
    break

preds_data = pd.concat([pd.DataFrame(list_tree),pd.DataFrame(list_preds)], axis = 1)
preds_data.columns = ([‘tree’,‘pred’])

neg_values = preds_data.loc[preds_data[‘pred’] <= 0.000]
pos_values = preds_data.loc[preds_data[‘pred’] > 0.000]

percentiles = [20,40,60,80,100]
pos_ntiles = []
for p in percentiles:
pos_ntiles.append(np.percentile(pos_values[‘pred’].to_list(),p))

pos_colors = [ # light to dark
#ffbaba’,
#ff7b7b’,
#ff5252’,
#ff2d2d’,
#ff0000’,
]

neg_ntiles = []
for p in percentiles:
neg_ntiles.append(np.percentile(neg_values[‘pred’].to_list(),p))

neg_colors = [ #dark to light
#acdda3’,
#bce3b5’,
#cdeac7’,
#ddf1da’,
#eef8ec’,
]

Next creating the tree using some of the source code with modifications:

from graphviz import Source
import json

iteration = 0

leaf_node_params = {
‘shape’: ‘box’,
‘style’: ‘filled’,
‘fillcolor’: ‘red’
}

extra = {‘size’: ‘100’, ‘ratio’: ‘.4’}
kwargs = {}
kwargs[‘leaf_node_params’] = leaf_node_params

for key, value in extra.items():
if ‘graph_attrs’ in kwargs.keys():
kwargs[‘graph_attrs’][key] = value
else:
kwargs[‘graph_attrs’] = {}

parameters = ‘dot’
if kwargs:
parameters += ‘:’
parameters += json.dumps(kwargs)

tree = loaded_model.get_dump(
fmap=’’,
dump_format=parameters,
)[iteration]

import re
leaf_locs = [m.start() for m in re.finditer(‘leaf’, tree)]
fillcolor_locs = [m.start() for m in re.finditer(‘fillcolor’, tree)]

string1 = tree[0:leaf_locs[0]-1]
list_of_strings = [string1]
i = 0
while i < len(fillcolor_locs):
string_x = tree[leaf_locs[i]-1:fillcolor_locs[i]+15]

if i == len(fillcolor_locs) - 1:
    string_y_end = len(tree)
else:
    string_y_end = leaf_locs[i+1]-1

string_y = tree[fillcolor_locs[i]+15:string_y_end]
list_of_strings.append(string_x)
list_of_strings.append(string_y)

i = i + 1

for index, value in enumerate(list_of_strings):
if ‘leaf’ in value:
x = value.replace(‘leaf’,’’)
leaf_value = round(float(x[2:x.find(‘fillcolor’,0,)-2]),3)

    if leaf_value <= neg_ntiles[0]:
        fillcolor = neg_colors[0]
    elif leaf_value <= neg_ntiles[1]:
        fillcolor = neg_colors[1]
    elif leaf_value <= neg_ntiles[2]:
        fillcolor = neg_colors[2]
    elif leaf_value <= neg_ntiles[3]:
        fillcolor = neg_colors[3]
    elif leaf_value <= neg_ntiles[4]:
        fillcolor = neg_colors[4]
    elif leaf_value <= pos_ntiles[0]:
        fillcolor = pos_colors[0]
    elif leaf_value <= pos_ntiles[1]:
        fillcolor = pos_colors[1]
    elif leaf_value <= pos_ntiles[2]:
        fillcolor = pos_colors[2]
    elif leaf_value <= pos_ntiles[3]:
        fillcolor = pos_colors[3]
    else:
        fillcolor = pos_colors[4]
        
    y = '''"''' + str(leaf_value) + '''"''' + ' fillcolor ="' + fillcolor + '''"'''
    list_of_strings[index] = y

tree = ‘’.join(list_of_strings)

image = Source(tree)

image.render(mypath, format = ‘png’)

I am open to suggestions/would love this to be standard as an option. I know it’s probably a big hack the way I walked through the dot text and replaced things. If there is a more elegant graphviz based way to do this, please share.