Pandas AI
Introduction
Pandas is the most popular open-source Python library used for data manipulation and analysis.
If you are familiar with Excel or SQL, you can think of Pandas as a "programmable Excel" that is much faster, more powerful, and capable of handling millions of rows of data.
By importing the excel or SQL to table format , it is powerfully for analysis the data , such as calculating sum, plotting graph, cleaning data, grouping data, ... .
By Integrating with AI, the prompt with natural language can convert into pandas function to obtain the calculated result, suitable for creating a chatbot of data dashboard
Example
def chat_with_sql():
llm = LiteLLM(
model="gpt-4.1",
api_key="sk-wx6xklvYEtIrd-CQuQcqNA",
base_url="https://ai.hld.com/lite-llm/"
)
pai.config.set({
"llm": llm
})
# Connect to MSSQL
conn_str = (
"DRIVER={ODBC Driver 17 for SQL Server};"
"SERVER=CLUSQL01;"
"DATABASE=HLDCCSCPRDB_report;"
"UID=HLDCCSCPRDBViewer;"
"PWD=EFbG5BVVNx!hgxox6#5EMkzd4ks*EN;"
)
conn = pyodbc.connect(conn_str)
query = """
SELECT
PtyCode AS ptycode,
ChnName,
PtyCode + ' - ' + ChnName as PtycodeCn,
SUM(TotalUnit) AS TotalUnit,
SUM(AvailableUnits) AS AvailableUnits,
SUM(AvailableLongTermUnits) AS AvailableLongTermUnits,
SUM(RentedUnit) AS RentedUnit,
CASE
WHEN SUM(AvailableLongTermUnits) = 0 THEN 0
ELSE SUM(RentedUnit) * 1.0 / SUM(AvailableLongTermUnits)
END AS RentRatio,
TakenDayTime
FROM
vwDTOverallRentalDetail
WHERE
CarType = 'ALL'
GROUP BY
PtyCode,
ChnName,
TakenDayTime,
PtyCode + ' - ' + ChnName
"""
# Load data into pandas DataFrame
rental_df = pd.read_sql(
query,
conn,
)
print(rental_df)
conn.close()
# Define column descriptions for better AI understanding
columns_dict = {
"ptycode": {"type": "string", "description": "Property code identifier (unique per property)"},
"ChnName": {"type": "string", "description": "Chinese name of the property"},
"PtycodeCn": {"type": "string", "description": "Combined property code and Chinese name"},
"TotalUnit": {"type": "integer", "description": "Total number of units for this property"},
"AvailableUnits": {"type": "integer", "description": "Number of available units for this property"},
"AvailableLongTermUnits": {"type": "integer", "description": "Number of available long-term rental units. Used as denominator when calculating rent ratio."},
"RentedUnit": {"type": "integer", "description": "Number of rented units. Used as numerator when calculating rent ratio."},
"RentRatio": {"type": "float", "description": "Per-record rental ratio for this single property = RentedUnit / AvailableLongTermUnits. WARNING: Do NOT use SUM(RentRatio) for aggregate calculations. To calculate total/overall/aggregate rent ratio across multiple records, always use: SUM(RentedUnit) / SUM(AvailableLongTermUnits)"},
"TakenDayTime": {"type": "datetime", "description": "The date when the data was recorded. Format is YYYY-MM-DD. Use this column for date filtering. Each property has one record per date."}
}
columns_list = [{"name": name, **props} for name, props in columns_dict.items()]
# Wrap pandas DataFrame with pai.DataFrame for PandasAI compatibility
pai_df = pai.DataFrame(rental_df)
# Load existing dataset or create new one
dataset_path = "company/rental-detail"
try:
df = pai.load(dataset_path)
except Exception:
df = pai.create(
path=dataset_path,
df=pai_df,
description="""Rental detail data from vwDTOverallRentalDetail view showing property rental statistics.
Each row represents one property on a specific date (TakenDayTime).
Contains: total units, available units, rented units and rental ratio per property per date.
""",
columns=columns_list
)
# Chat with your data
# Data Set 1: Overall
response1 = df.chat("What is the total rent ratio in November 2025?")
print(response1)
response2 = df.chat("What is the total number of rented units in November 2025?")
print(response2)
response3 = df.chat("What is the total number of available long-term rental units in November 2025?")
print(response3)
response4 = df.chat("What is the total number of total units in November 2025?")
print(response4)
response5 = df.chat("Comparing October 2025 and November 2025, what is the change in the total rent ratio?")
print(response5)
response6 = df.chat("Comparing October 2025 and November 2025, what is the change in the total number of rented units?")
print(response6)
response7 = df.chat("Comparing October 2025 and November 2025, what is the change in the total number of available long-term rental units?")
print(response7)
response8 = df.chat("Comparing October 2025 and November 2025, what is the change in the total number of total units?")
print(response8)
# Data Set 2: Property Level
response9 = df.chat("What is the rent ratio for 友邦廣場 in November 2025?")
response10 = df.chat("What is the top 20 highest rent ratio properties in November 2025?")
print(response9)
print(response10)Result
Reference
Last updated
Was this helpful?