Skip to content

Commit

Permalink
Changes to make multiple language support easier.
Browse files Browse the repository at this point in the history
  • Loading branch information
jcranmer committed Jul 27, 2011
1 parent 3376d1e commit 8065f64
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 79 deletions.
39 changes: 26 additions & 13 deletions dxr-index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool as Pool
from itertools import chain
import os
import sys
import getopt
import subprocess
import dxr
import dxr.htmlbuilders
import dxr.languages
import getopt
import os
import shutil
import dxr
import sqlite3
import string
import subprocess
import sys
import time

# At this point in time, we've already compiled the entire build, so it is time
Expand Down Expand Up @@ -174,23 +175,35 @@ def builddb(treecfg, dbdir):
print "Storing data..."
dxr.store_big_blob(treecfg, big_blob)

# Build the sql for later queries. This is a combination of the main language
# schema as well as plugin-specific information. The pragmas that are
# executed should make the sql stage go faster.
print "Building SQL..."
all_statements = []
schemata = []
for plugin in dxr.get_active_plugins(treecfg):
schemata.append(plugin.get_schema())
if plugin.__name__ in big_blob:
all_statements.extend(plugin.sqlify(big_blob[plugin.__name__]))

dbname = treecfg.tree + '.sqlite'
conn = sqlite3.connect(os.path.join(dbdir, dbname))
conn.execute('PRAGMA synchronous=off')
conn.execute('PRAGMA page_size=65536')
# Safeguard against non-ASCII text. Let's just hope everyone uses UTF-8
conn.text_factory = str

# Import the schemata
schemata = [dxr.languages.get_standard_schema()]
for plugin in dxr.get_active_plugins(treecfg):
schemata.append(plugin.get_schema())
conn.executescript('\n'.join(schemata))
conn.commit()
for stmt in all_statements:

# Load and run the SQL
def sql_generator():
for plugin in dxr.get_active_plugins(treecfg):
if plugin.__name__ in big_blob:
plugblob = big_blob[plugin.__name__]
for statement in plugin.sqlify(plugblob):
yield statement
for statement in dxr.languages.get_sql_statements("native", plugblob):
yield statement

for stmt in sql_generator():
if isinstance(stmt, tuple):
conn.execute(stmt[0], stmt[1])
else:
Expand Down
77 changes: 77 additions & 0 deletions dxr/languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import dxr.plugins

# The following schema is the common global schema, so no matter which plugins
# are used, this schema will always be present. Most tables have a language
# column which indicates the source language that the type is written in.
language_schema = dxr.plugins.Schema({
# Scope definitions: a scope is anything that is both interesting (i.e., not
# a namespace) and can contain other objects. The IDs for this scope should be
# IDs in other tables as well; the table its in can disambiguate which type of
# scope you're looking at.
"scopes": [
("scopeid", "INTEGER", False), # An ID for this scope
("sname", "VARCHAR(256)", True), # Name of the scope
("sloc", "_location", True), # Location of the canonical decl
("language", "_language", False), # The language of the scope
("_key", "scopeid")
],
# Type definitions: anything that defines a type per the relevant specs.
"types": [
("tid", "INTEGER", False), # Unique ID for the type
("scopeid", "INTEGER", False), # Scope this type is defined in
("tname", "VARCHAR(256)", False), # Simple name of the type
("tqualname", "VARCHAR(256)", False), # Fully-qualified name of the type
("tloc", "_location", False), # Location of canonical decl
("tkind", "VARCHAR(32)", True), # Kind of type (e.g., class, union)
("language", "_language", False), # Language of the type
("_key", "tid")
],
# Inheritance relations: note that we store the full transitive closure in
# this table, so if A extends B and B extends C, we'd have (A, C) stored in
# the table as well; this is necessary to make SQL queries work, since there's
# no "transitive closure lookup expression".
"impl": [
("tbase", "INTEGER", False), # tid of base type
("tderived", "INTEGER", False), # tid of derived type
("inhtype", "VARCHAR(32)", True), # Type of inheritance; NULL is indirect
("_key", "tbase", "tderived")
],
# Functions: functions, methods, constructors, operator overloads, etc.
"functions": [
("funcid", "INTEGER", False), # Function ID (also in scopes)
("scopeid", "INTEGER", False), # Scope defined in
("fname", "VARCHAR(256)", False), # Short name (no args)
("fqualname", "VARCHAR(512)", False), # Fully qualified name, excluding args
("fargs", "VARCHAR(256)", False), # Argument string, including parens
("ftype", "VARCHAR(256)", False), # Full return type, as a string
("floc", "_location", True), # Location of definition
("modifiers", "VARCHAR(256)", True), # Modifiers (e.g., private)
("language", "_language", False), # Language of the function
("_key", "funcid")
],
# Variables: class, global, local, enum constants; they're all in here
# Variables are of course not scopes, but for ease of use, they use IDs from
# the same namespace, no scope will have the same ID as a variable and v.v.
"variables": [
("varid", "INTEGER", False), # Variable ID
("scopeid", "INTEGER", False), # Scope defined in
("vname", "VARCHAR(256)", False), # Short name
("vloc", "_location", True), # Location of definition
("vtype", "VARCHAR(256)", True), # Full type (including pointer stuff)
("modifiers", "VARCHAR(256)", True), # Modifiers for the declaration
("language", "_language", False), # Language of the function
("_key", "varid")
],
"crosslang": [
("canonid", "INTEGER", False),
("otherid", "INTEGER", False),
("otherlanguage", "VARCHAR(32)", False),
],
})


def get_standard_schema():
return language_schema.get_create_sql()

def get_sql_statements(lang_name, plugin_blob):
return language_schema.get_data_sql(plugin_blob, lang_name)
21 changes: 16 additions & 5 deletions dxr/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ def get_create_sql(self):
""" Returns the SQL that creates the tables in this schema. """
return '\n'.join([tbl.get_create_sql() for tbl in self.tables.itervalues()])

def get_data_sql(self, blob):
def get_data_sql(self, blob, language=''):
""" Returns the SQL that inserts data into tables given a blob. """
for tbl in self.tables:
if tbl in blob:
sqliter = self.tables[tbl].get_data_sql(blob[tbl])
sqliter = self.tables[tbl].get_data_sql(blob[tbl], language)
for sql in sqliter:
yield sql

Expand Down Expand Up @@ -132,6 +132,7 @@ def __init__(self, tblname, tblschema):
self.name = tblname
self.key = None
self.columns = []
self.needLang = False
defaults = ['VARCHAR(256)', True]
for col in tblschema:
if isinstance(tblschema, tuple) or isinstance(tblschema, list):
Expand All @@ -154,12 +155,15 @@ def get_create_sql(self):
sql += 'CREATE TABLE %s (\n ' % (self.name)
colstrs = []
special_types = {
'_location': 'VARCHAR(256)'
'_location': 'VARCHAR(256)',
'_language': 'VARCHAR(32)'
}
for col, spec in self.columns:
specsql = col + ' '
if spec[0][0] == '_':
specsql += special_types[spec[0]]
if spec[0] == '_language':
self.needLang = True
else:
specsql += spec[0]
if len(spec) > 1 and spec[1] == False:
Expand All @@ -171,17 +175,17 @@ def get_create_sql(self):
sql += '\n);\n'
return sql

def get_data_sql(self, blobtbl):
def get_data_sql(self, blobtbl, language):
it = isinstance(blobtbl, dict) and blobtbl.itervalues() or blobtbl
colset = set(col[0] for col in self.columns)
for row in it:
if self.needLang: row['language'] = language;
# Only add the keys in the columns
keys = colset.intersection(row.iterkeys())
args = tuple(row[k] for k in keys)
yield ('INSERT OR IGNORE INTO %s (%s) VALUES (%s);' % (self.name,
','.join(keys), ','.join('?' for k in keys)), args)


def make_get_schema_func(schema):
""" Returns a function that satisfies get_schema's contract from the given
schema object. """
Expand All @@ -193,3 +197,10 @@ def get_schema():
def required_exports():
""" Returns the required exports for a module, for use as __all__. """
return ['post_process', 'sqlify', 'can_use', 'get_htmlifiers', 'get_schema']

last_id = 0
def next_global_id():
""" Returns a unique identifier that is unique compared to other IDs. """
global last_id
last_id += 1
return last_id
61 changes: 6 additions & 55 deletions xref-tools/cxx-clang/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,42 +79,36 @@ def recanon_decl(name, loc):

# Produce all scopes
scopes = {}
nextIndex = 1
typeKeys = set()
for t in types:
key = canonicalize_decl(t[0], t[1])
if key not in types:
key = recanon_decl(t[0], t[1])
if key not in scopes:
typeKeys.add(key)
types[key]['tid'] = scopes[key] = nextIndex
nextIndex += 1
types[key]['tid'] = scopes[key] = dxr.plugins.next_global_id()
# Typedefs need a tid, but they are not a scope
for t in typedefs:
typedefs[t]['tid'] = nextIndex
nextIndex += 1
typedefs[t]['tid'] = dxr.plugins.next_global_id()
funcKeys = set()
for f in functions:
key = canonicalize_decl(f[0], f[1])
if key not in functions:
key = recanon_decl(f[0], f[1])
if key not in scopes:
funcKeys.add(key)
functions[key]['funcid'] = scopes[key] = nextIndex
nextIndex += 1
functions[key]['funcid'] = scopes[key] = dxr.plugins.next_global_id()

# Variables aren't scoped, but we still need to refer to them in the same
# manner, so we'll unify variables with the scope ids
varKeys = {}
for v in variables:
key = (v[0], v[1])
if key not in varKeys:
varKeys[key] = variables[v]['varid'] = nextIndex
nextIndex += 1
varKeys[key] = variables[v]['varid'] = dxr.plugins.next_global_id()

for m in macros:
macros[m]['macroid'] = nextIndex
nextIndex += 1
macros[m]['macroid'] = dxr.plugins.next_global_id()

# Scopes are now defined, this allows us to modify structures for sql prep

Expand Down Expand Up @@ -282,16 +276,6 @@ def can_use(treecfg):
return dxr.plugins.in_path('clang') and dxr.plugins.in_path('llvm-config')

schema = dxr.plugins.Schema({
# Scope definitions: a scope is anything that is both interesting (i.e., not
# a namespace) and can contain other objects. The IDs for this scope should be
# IDs in other tables as well; the table its in can disambiguate which type of
# scope you're looking at.
"scopes": [
("scopeid", "INTEGER", False), # An ID for this scope
("sname", "VARCHAR(256)", True), # Name of the scope
("sloc", "_location", True), # Location of the canonical decl
("_key", "scopeid")
],
# Type definitions: anything that defines a type per the relevant specs.
"types": [
("tid", "INTEGER", False), # Unique ID for the type
Expand All @@ -301,42 +285,9 @@ def can_use(treecfg):
("tloc", "_location", False), # Location of canonical decl
("tkind", "VARCHAR(32)", True), # Kind of type (e.g., class, union)
("ttypedef", "VARCHAR(256)", True), # Type (if this is a typedef)
("language", "_language", True), # Language of the type
("_key", "tid")
],
# Inheritance relations: note that we store the full transitive closure in
# this table, so if A extends B and B extends C, we'd have (A, C) stored in
# the table as well; this is necessary to make SQL queries work, since there's
# no "transitive closure lookup expression".
"impl": [
("tbase", "INTEGER", False), # tid of base type
("tderived", "INTEGER", False), # tid of derived type
("inhtype", "VARCHAR(32)", True), # Type of inheritance; NULL is indirect
("_key", "tbase", "tderived")
],
# Functions: functions, methods, constructors, operator overloads, etc.
"functions": [
("funcid", "INTEGER", False), # Function ID (also in scopes)
("scopeid", "INTEGER", False), # Scope defined in
("fname", "VARCHAR(256)", False), # Short name (no args)
("fqualname", "VARCHAR(512)", False), # Fully qualified name, excluding args
("fargs", "VARCHAR(256)", False), # Argument vector
("ftype", "VARCHAR(256)", False), # Full return type, as a string
("floc", "_location", True), # Location of definition
("modifiers", "VARCHAR(256)", True), # Modifiers (e.g., private)
("_key", "funcid")
],
# Variables: class, global, local, enum constants; they're all in here
# Variables are of course not scopes, but for ease of use, they use IDs from
# the same namespace, no scope will have the same ID as a variable and v.v.
"variables": [
("varid", "INTEGER", False), # Variable ID
("scopeid", "INTEGER", False), # Scope defined in
("vname", "VARCHAR(256)", False), # Short name
("vloc", "_location", True), # Location of definition
("vtype", "VARCHAR(256)", True), # Full type (including pointer stuff)
("modifiers", "VARCHAR(256)", True), # Modifiers for the declaration
("_key", "varid")
],
# References to functions, types, variables, etc.
"refs": [
("refid", "INTEGER", False), # ID of the identifier being referenced
Expand Down
10 changes: 4 additions & 6 deletions xref-tools/moztools/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,9 @@ def collect_files(arg, dirname, fnames):

blob = {}
blob["interfaces"] = {}
nextNum = 2 ** 16
for iface in interfaces:
blob["interfaces"][iface] = interfaces[iface]
interfaces[iface]["iid"] = nextNum
nextNum += 1
interfaces[iface]["iid"] = dxr.plugins.next_global_id()
tblmap = {
"attributes": "attrid",
"methods": "funcid",
Expand All @@ -63,10 +61,10 @@ def collect_files(arg, dirname, fnames):
blob[table] = {}
things = globals()[table]
for thing, tinfo in things.iteritems():
blob[table][nextNum] = tinfo
tinfo[tblmap[table]] = nextNum
id = dxr.plugins.next_global_id()
blob[table][id] = tinfo
tinfo[tblmap[table]] = id
tinfo["iid"] = interfaces[tinfo["iface"]]["iid"]
nextNum += 1

# File pivoting. Joy.
def schema():
Expand Down

0 comments on commit 8065f64

Please sign in to comment.