Using Python’s data science tools for data extraction

Pandas- flexible and easy to use open-source data analysis and manipulation tool

DuckDB - a high-performance analytical database system

Here’s a quick sample of how to extract data from a drawing, put the data inside a data frame, then make queries against the data

from pyrx_imp import Rx, Ge, Gi, Db, Ap, Ed
from timeit import default_timer as timer
from collections import defaultdict
import pandas as pd
import traceback
import duckdb

def processBlock(db, blkname, data):
    bt = Db.BlockTable(db.blockTableId())
    if not blkname in bt:
        return
    btr = Db.BlockTableRecord(bt[blkname])
    refs = [Db.BlockReference(id) for id in btr.getBlockReferenceIds()]
    atts = [Db.AttributeReference(id) for ref in refs for id in ref.attributeIds()]
    for att in atts:
        data[att.tag()].append(att.textString())

def processDb(path, blkname, data):
    sideDb = Db.Database(False, True)
    sideDb.readDwgFile(path)
    sideDb.closeInput(True)
    processBlock(sideDb, blkname, data)

def PyRxCmd_doit():
    try:
        start = timer()
        data = defaultdict(list)
        processDb("E:\\Temp\\06457Submittal.dwg", "elev", data)

        # move the dict into pandas
        df = pd.DataFrame(data)

        # # create a relation with duckdb so we can use filter expressions
        rl = duckdb.df(df)
        res = rl.filter("ROOM == '101'")

        print("Seconds = {}".format(timer() - start))
        print(rl)  # print all the data
        print(res)  # print the search

    except Exception as err:
        traceback.print_exception(err)

https://github.com/CEXT-Dan/PyRx