Skip to content

Callback functions for formatting

cluster_id_if_cluster(data, linkage_id)

Returns cluster ID if a node belongs to one cluster, otherwise an empty string.

Parameters:

Name Type Description Default
data ClusteringData required
linkage_id int

linkage ID

required

Returns:

Name Type Description
str str

Cluster ID or empty string.

Source code in idendrogram/callbacks.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def cluster_id_if_cluster(data: ClusteringData, linkage_id: int) -> str:
    """Returns cluster ID if a node belongs to one cluster, otherwise an empty string.

    Args:
        data (ClusteringData): [idendrogram.ClusteringData][] object
        linkage_id (int): linkage ID

    Returns:
        str: Cluster ID or empty string.
    """
    L, M = data.get_leaders()
    if linkage_id in L:
        return str(M[L == linkage_id][0])
    else:
        return ""

cluster_labeller(fmt_string='Cluster {cluster} ({cluster_size} data points)')

Returns a callable designed to be used as a callback to axis_label_func parameter of idendrogram.idendrogram.create_dendrogram. Returns a formatted string for the first encountered node in a cluster, otherwise an empty string.

Parameters:

Name Type Description Default
fmt_string str

Formatting string. Variables available at the time of evaluation are cluster, cluster_size and linkage_id.

'Cluster {cluster} ({cluster_size} data points)'

Returns:

Type Description
Callable[[ClusteringData, int], str]

Callable[[ClusteringData, int], str]: Callable designed to be used as a callback to to axis_label_func parameter of idendrogram.idendrogram.create_dendrogram.

Source code in idendrogram/callbacks.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def cluster_labeller(
    fmt_string: str = "Cluster {cluster} ({cluster_size} data points)",
) -> Callable[[ClusteringData, int], str]:
    """Returns a callable designed to be used as a callback to `axis_label_func` parameter of [idendrogram.idendrogram.create_dendrogram][]. 
    Returns a formatted string for the first encountered node in a cluster, otherwise an empty string.

    Args:
        fmt_string (str, optional): Formatting string. Variables available at the time of evaluation are `cluster`, `cluster_size` and `linkage_id`.

    Returns:
        Callable[[ClusteringData, int], str]: Callable designed to be used as a callback to to `axis_label_func` parameter of [idendrogram.idendrogram.create_dendrogram][].
    """

    seen_clusters = []

    def labeller(data: ClusteringData, linkage_id: int) -> str:
        _, nodelist = data.get_tree()

        # grab first real leaf node of the passed id
        leaf_nodes = nodelist[linkage_id].pre_order(
            lambda x: x.id if x.is_leaf() else None
        )
        lf_node = leaf_nodes[0]

        # get its cluster assignment
        cluster = data.cluster_assignments[lf_node]

        if cluster not in seen_clusters:
            seen_clusters.append(cluster)
            # get cluster size
            cluster_size = nodelist[linkage_id].get_count()
            return fmt_string.format(
                cluster=cluster, cluster_size=cluster_size, id=linkage_id
            )
        else:
            return " "

    return labeller

counts(data, linkage_id)

Returns the number of original observations associated with the linkage ID. Used as the default for axis label callback.

Parameters:

Name Type Description Default
data ClusteringData required
linkage_id int

linkage ID

required

Returns:

Name Type Description
str str

number of original observations (as string)

Source code in idendrogram/callbacks.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
def counts(data: ClusteringData, linkage_id: int) -> str:
    """Returns the number of original observations associated with the linkage ID. Used as the default for axis label callback.

    Args:
        data (ClusteringData): [idendrogram.ClusteringData][] object
        linkage_id (int): linkage ID

    Returns:
        str: number of original observations (as string)
    """
    _, nodelist = data.get_tree()
    return str(nodelist[linkage_id].get_count())

default_hover(data, linkage_id)

For a given linkage ID, returns a dictionary with two keys: linkage id and # of items. Used as the default for tooltips.

Parameters:

Name Type Description Default
data ClusteringData required
linkage_id int

linkage ID

required

Returns:

Name Type Description
Dict Dict

Dictionary with attributes

Source code in idendrogram/callbacks.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def default_hover(data: ClusteringData, linkage_id: int) -> Dict:
    """For a given linkage ID, returns a dictionary with two keys: linkage id and # of items. Used as the default for tooltips.


    Args:
        data (ClusteringData): [idendrogram.ClusteringData][] object
        linkage_id (int): linkage ID

    Returns:
        Dict: Dictionary with attributes
    """
    return {
        "# of items": counts(data=data, linkage_id=linkage_id),
        "linkage id": linkage_id,
    }

Creates a callable compatible with link_color_func argument of idendrogram.idendrogram that will color nodes based on the cluster they belong to, with a separate color for nodes containing multiple clusters.

Parameters:

Name Type Description Default
colors Dict

Dictionary mapping cluster IDs to colors. Defaults to Matplotlib 10-color scheme.

dict()
above_threshold str

Color to be used for nodes containing multiple clusters.

'#1f77b4'

Returns:

Type Description
Callable[[ClusteringData, int], str]

Callable[[ClusteringData, int], str]: Callable to be used as link_color_func argument of idendrogram.idendrogram.

Example
    #your clustering workflow
    Z = scipy.cluster.hierarchy.linkage(...)
    cluster_assignments =  scipy.cluster.hierarchy.fcluster(Z, threshold=threshold, ...) 

    # let's assume clustering resulted in 3 clusters and we want to have them as red/blue/green
    # cluster_assignments.unique == 3

    # define a custom coloring function
    painter = idendrogram.callbacks.link_painter(
        colors={
            1: 'red',
            2: 'blue',
            3: 'green',
        }, 
        above_threshold='black'
    )

    #create the dendrogram
    dd = idendrogram.idendrogram()            
    dd.set_cluster_info(
        idendrogram.ClusteringData(
            linkage_matrix=Z, 
            cluster_assignments=cluster_assignments, 
            threshold=threshold 
        )
    )
    dd.create_dendrogram(link_color_func = painter).to_plotly()
Source code in idendrogram/callbacks.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def link_painter(
    colors: Dict[int, str] = dict(),
    above_threshold: str = "#1f77b4",
) -> Callable[[ClusteringData, int], str]:
    """Creates a callable compatible with `link_color_func` argument of [idendrogram.idendrogram][] 
        that will color nodes based on the cluster they belong to, with a separate color for nodes containing multiple clusters. 

    Args:
        colors (Dict, optional): Dictionary mapping cluster IDs to colors. Defaults to Matplotlib 10-color scheme.
        above_threshold (str, optional): Color to be used for nodes containing multiple clusters.

    Returns:
        Callable[[ClusteringData, int], str]: Callable to be used as `link_color_func` argument of [idendrogram.idendrogram][].

    Example:
        ```
            #your clustering workflow
            Z = scipy.cluster.hierarchy.linkage(...)
            cluster_assignments =  scipy.cluster.hierarchy.fcluster(Z, threshold=threshold, ...) 

            # let's assume clustering resulted in 3 clusters and we want to have them as red/blue/green
            # cluster_assignments.unique == 3

            # define a custom coloring function
            painter = idendrogram.callbacks.link_painter(
                colors={
                    1: 'red',
                    2: 'blue',
                    3: 'green',
                }, 
                above_threshold='black'
            )

            #create the dendrogram
            dd = idendrogram.idendrogram()            
            dd.set_cluster_info(
                idendrogram.ClusteringData(
                    linkage_matrix=Z, 
                    cluster_assignments=cluster_assignments, 
                    threshold=threshold 
                )
            )
            dd.create_dendrogram(link_color_func = painter).to_plotly()
        ```
    """
    if len(colors) == 0:
        colors = {
            1: "#ff7f0e",
            2: "#2ca02c",
            3: "#d62728",
            4: "#9467bd",
            5: "#8c564b",
            6: "#e377c2",
            7: "#7f7f7f",
            8: "#bcbd22",
            9: "#17becf",
        }
    def _get_color(cluster_assignment: int) -> str:
        if cluster_assignment in colors.keys():
            color = colors[cluster_assignment]
        else:
            color_index = cluster_assignment % len(colors)
            color = list(colors.values())[color_index]

        return color

    def link_colors(data: ClusteringData, linkage_id: int) -> str:

        cluster_id = data.get_cluster_id(linkage_id=linkage_id)
        if cluster_id is None:
            return above_threshold
        else:
            return _get_color(cluster_id)

    return link_colors