Python Essentials for AI

Python is the dominant language in AI/ML. Here's what you need to be comfortable with.

Data Structures That Matter

In ML, you'll constantly work with these:

python

1# Lists — ordered, mutable sequences
2features = [1.5, 2.3, 0.7, 4.1]
3labels = [0, 1, 1, 0]
4
5# List comprehensions — you'll use these constantly
6squared = [x ** 2 for x in features]
7normalized = [x / max(features) for x in features]
8
9# Dictionaries — key-value pairs (model configs, hyperparameters)
10config = {
11    "learning_rate": 0.001,
12    "batch_size": 32,
13    "epochs": 100,
14    "optimizer": "adam",
15}
16
17# Unpacking
18lr, bs = config["learning_rate"], config["batch_size"]
19
20# Zip — pairing data together
21for feature, label in zip(features, labels):
22    print(f"Input: {feature}, Target: {label}")

Functions and Classes

python

1# Functions with type hints (common in ML codebases)
2def normalize(data: list[float]) -> list[float]:
3    """Min-max normalization to [0, 1] range."""
4    min_val, max_val = min(data), max(data)
5    return [(x - min_val) / (max_val - min_val) for x in data]
6
7# Classes — you'll subclass a lot in PyTorch/TF
8class SimplePreprocessor:
9    def __init__(self, strategy: str = "normalize"):
10        self.strategy = strategy
11        self.params = {}
12
13    def fit(self, data: list[float]) -> "SimplePreprocessor":
14        """Learn parameters from data."""
15        self.params["min"] = min(data)
16        self.params["max"] = max(data)
17        self.params["mean"] = sum(data) / len(data)
18        return self
19
20    def transform(self, data: list[float]) -> list[float]:
21        """Apply learned transformation."""
22        if self.strategy == "normalize":
23            r = self.params["max"] - self.params["min"]
24            return [(x - self.params["min"]) / r for x in data]
25        return data
26
27# Usage pattern (same as scikit-learn!)
28prep = SimplePreprocessor("normalize")
29prep.fit(features)
30result = prep.transform(features)

The fit/transform Pattern

Almost every ML library uses this pattern: fit() learns from data, transform() applies what was learned. You'll see it in scikit-learn, TensorFlow preprocessing, and more.

Generators and Iterators

Critical for handling large datasets that don't fit in memory:

python

1# Generator — yields data one batch at a time
2def data_generator(data, batch_size=32):
3    """Yield batches of data — essential for large datasets."""
4    for i in range(0, len(data), batch_size):
5        yield data[i:i + batch_size]
6
7# Usage
8dataset = list(range(1000))
9for batch in data_generator(dataset, batch_size=64):
10    print(f"Processing batch of size {len(batch)}")

1# Lists — ordered, mutable sequences 2features = [1.5, 2.3, 0.7, 4.1] 3labels = [0, 1, 1, 0] 4 5# List comprehensions — you'll use these constantly 6squared = [x ** 2 for x in features] 7normalized = [x / max(features) for x in features] 8 9# Dictionaries — key-value pairs (model configs, hyperparameters) 10config = { 11 "learning_rate": 0.001, 12 "batch_size": 32, 13 "epochs": 100, 14 "optimizer": "adam", 15} 16 17# Unpacking 18lr, bs = config["learning_rate"], config["batch_size"] 19 20# Zip — pairing data together 21for feature, label in zip(features, labels): 22 print(f"Input: {feature}, Target: {label}")

1# Functions with type hints (common in ML codebases) 2def normalize(data: list[float]) -> list[float]: 3 """Min-max normalization to [0, 1] range.""" 4 min_val, max_val = min(data), max(data) 5 return [(x - min_val) / (max_val - min_val) for x in data] 6 7# Classes — you'll subclass a lot in PyTorch/TF 8class SimplePreprocessor: 9 def __init__(self, strategy: str = "normalize"): 10 self.strategy = strategy 11 self.params = {} 12 13 def fit(self, data: list[float]) -> "SimplePreprocessor": 14 """Learn parameters from data.""" 15 self.params["min"] = min(data) 16 self.params["max"] = max(data) 17 self.params["mean"] = sum(data) / len(data) 18 return self 19 20 def transform(self, data: list[float]) -> list[float]: 21 """Apply learned transformation.""" 22 if self.strategy == "normalize": 23 r = self.params["max"] - self.params["min"] 24 return [(x - self.params["min"]) / r for x in data] 25 return data 26 27# Usage pattern (same as scikit-learn!) 28prep = SimplePreprocessor("normalize") 29prep.fit(features) 30result = prep.transform(features)

1# Generator — yields data one batch at a time 2def data_generator(data, batch_size=32): 3 """Yield batches of data — essential for large datasets.""" 4 for i in range(0, len(data), batch_size): 5 yield data[i:i + batch_size] 6 7# Usage 8dataset = list(range(1000)) 9for batch in data_generator(dataset, batch_size=64): 10 print(f"Processing batch of size {len(batch)}")