agrupar valores continuos en Pyspark

from pyspark.ml.feature import Bucketizer

df2 = Bucketizer(
    splits=[-float('inf'), 0, 500, 1000, float('inf')],
    inputCol='numbers',
    outputCol='numbers_bin'
).transform(df)

df2.show()
+-------+-----------+
|numbers|numbers_bin|
+-------+-----------+
| 142.56|        1.0|
|   null|       null|
|2023.33|        3.0|
| 477.76|        1.0|
| 175.52|        1.0|
|1737.45|        3.0|
| 520.72|        2.0|
|  641.2|        2.0|
|   79.3|        1.0|
| 138.43|        1.0|
+-------+-----------+
Courageous Caterpillar