ADDED data/csv/db_experiment/v0.2/db_docs.md Index: data/csv/db_experiment/v0.2/db_docs.md ================================================================== --- /dev/null +++ data/csv/db_experiment/v0.2/db_docs.md @@ -0,0 +1,93 @@ +**Toponymic Database Documentation** + +## Overview +This database is designed to systematically store, analyze and eventually visualize historical and contemporary toponyms (place names) with a structured approach, focusing on spatial, linguistic, and temporal aspects. The structure consists of three interlinked tables: +1. **Spatial Table** - Stores geographical locations and metadata. +2. **Linguistic Table** - Captures toponyms, their various variants and linguistic attributes like pronunciation and etymology. +3. **Temporal Table** - Documents historical changes in toponyms over time. + +--- + +## **1. Spatial Table** + +### **Purpose:** +The spatial table contains unique geographic entities, their locations, and relevant metadata. + +### **Columns:** +- **`ID`** *(int, PK)*: Unique identifier for each geographic object. In the very beginning the order followed an original alphabetically sorted table, 2023_jun_14_dubanov_toponymy, based on a book (Дубанов И.С. «Топонимический словарь Чувашии. Географические названия и термины») order, now it is basically free. +- **`LAT`, `LON`** *(float)*: Estimated geographic coordinates derived from historical and modern maps (see http://www.etomesto.ru/ , https://retromap.ru/ and various other resources). +- **`OFFNAME`** *(string)*: The most "official" available name for the location (either current or historical). +- **`LANG`** *(string)*: The language of the name, using codes such as `RUS`, `CHU`, `HIM`, `MEM`, `ERZ`, `TAT`, `UNK`. +- **`CLASS`** *(string)*: Specifies the toponymic category, e.g., oikonym (settlement), hydronym (water body), dromonym (road), etc. +- **`TYPE`** *(string)*: Specifies the sub-category, e.g., village, city (for oikonyms), river, stream (for hydronyms), etc. +- **`DISTRICT`** *(string, nullable)*: Modern administrative district (e.g., to differentiate places with the same name). +- **`DOUBT`** *(int, nullable)*: Certainty level of assigned coordinates (empty = "I'm sure", 1 = "The coordinates are doubtful"). +- **`LANDMARK`** *(string, nullable)*: Relevant for microtoponyms; describes nearby visible features to help locate the object. +- **`COMMENTS`** *(text, nullable)*: Free-text notes for human workers. +- **`OTHER`** *(text, nullable)*: Reserved for future use. + +--- + +## **2. Linguistic Table** + +### **Purpose:** +The linguistic table stores different names (and their variants) of the same geographic object across different languages and sources, etymological information and other linguistic metadata. + +### **Columns:** +- **`ID`** *(int, PK)*: Unique identifier for each toponym entry, independent from the spatial table. +- **`SPATID`** *(int, FK → Spatial.ID)*: Links the toponym to its corresponding spatial object. +- **`DOUSPAT`** *(int, nullable)*: Certainty of name relation to the spatial object (empty = "I'm sure", 1 = "The spatial link is doubtful"). +- **`MAINID`** *(int, FK → Linguistic.ID)*: Represents the "main" name in a "nest" of interconnected toponyms (sometimes grouping is necessary, many sources give several somehow connected names for a single spatial object and we want to reflect this connections in database). +- **`TOPONYM`** *(string)*: The attested name as recorded from sources. +- **`DOUTOPO`** *(int, nullable)*: Certainty of the given orthographic form (empty = "I'm sure", 1 = "It is doubtful"). +- **`LANG`** *(string)*: The language of the toponym, following the predefined codes. +- **`DOULANG`** *(int, nullable)*: Certainty of the language of the toponym (empty = "I'm sure", 1 = "It is doubtful"). +- **`PRONUNC`** *(text, nullable)*: Reserved for pronunciation information (format yet to be decided). +- **`DOUPRON`** *(int, nullable)*: Certainty of the pronunciation information (empty = "I'm sure", 1 = "It is doubtful"). +- **`ETYM`** *(text, nullable)*: Etymological explanation of the name's origin. It is always doubtful. +- **`ORIGIN`** *(string, nullable)*: Language from which the name originated (e.g., `CHU`, `RUS`, `TAT`, `OTH`, `UNK`). This one is always doubtful too. +- **`COMMENTS`** *(text, nullable)*: Free-text notes. +- **`OTHER`** *(text, nullable)*: Reserved for future use. + +--- + +## **3. Temporal Table** + +### **Purpose:** +This table records historical changes in toponyms, tracking their usage over time. + +### **Columns:** +- **`ID`** *(int, PK)*: Unique identifier for each temporal record. +- **`LINGID`, `LINGNAME`** *(Composite FK → Linguistic (ID, TOPONYM))*: Together, these fields reference a specific toponym in the linguistic table. +- **`TOPFORMS`** *(text, nullable)*: Holds different attested spelling variations of the name from multiple sources. +- **`START`** *(int)*: Earliest recorded use of the name (source text goes to **`FULLTEXT`** column). +- **`DOUSTART`** *(int)*: Certainty level for the `START` date (empty = "I'm sure", 1 = "It is doubtful"). +- **`END`** *(int, nullable)*: The last recorded use of the name. If the object still exists, this field remains empty. The source text is stored in the **FULLTEXT** column. +- **`DOUEND`** *(int, nullable)*: Certainty level for the `END` date (empty = "I'm sure", 1 = "It is doubtful"). +- **`EVENT`** *(string)*: Categorization of historical change, with predefined values: + - `MERGEIN`: The object was absorbed into another. + - `ACTIVE`: The name is still in use. + - `RENAME`: The object was renamed. + - `CEASE`: The object ceased to exist. +- **`OBJID`** *(int, nullable)*, **`OBJNAME`** *(text, nullable)*: Context-dependent: + - **ACTIVE**: Both empty. + - **RENAME**: Stores the information about the **new** name of the object, it is a reference to Linguistic (ID, TOPONYM). + - **MERGEIN**: Stores the information about the **absorbing** object, again, it is a reference to Linguistic (ID, TOPONYM). + - **CEASE**: Both empty. +- **`COMMENTS`** *(text, nullable)*: Free-text notes. +- **`OTHER`** *(text, nullable)*: Reserved for future use. +- **`FULLTEXT`** *(text)*: Full textual information about the historical event, preserving source transparency. + +--- + +## **Relationships and Interactions** +- **`SPATID` (Linguistic) → `ID` (Spatial)**: Links toponyms to geographic locations. +- **`LINGID` (Temporal) → `ID` (Linguistic)** and **`LINGNAME` (Temporal) → `TOPONYM` (Linguistic)**: Tracks historical changes of a specific name. +- **`OBJID` (Temporal) → `ID` (Linguistic)** and **`OBJNAME` (Temporal) → `TOPONYM` (Linguistic)**: Records specifically name changes or mergers. + +--- + +## **Next Steps** +- Finalize the format for the `PRONUNC` field. + + ADDED data/csv/db_experiment/v0.2/db_structure.sql Index: data/csv/db_experiment/v0.2/db_structure.sql ================================================================== --- /dev/null +++ data/csv/db_experiment/v0.2/db_structure.sql @@ -0,0 +1,52 @@ +CREATE TABLE spatial ( + ID INT PRIMARY KEY, + LAT FLOAT NOT NULL, + LON FLOAT NOT NULL, + OFFNAME TEXT NOT NULL, + LANG VARCHAR(3) NOT NULL, + CLASS TEXT NOT NULL, + TYPE TEXT NOT NULL, + DISTRICT TEXT, + DOUBT INT CHECK (DOUBT IN (1)) NULL, + LANDMARK TEXT, + COMMENTS TEXT, + OTHER TEXT +); + +CREATE TABLE linguistic ( + ID INT PRIMARY KEY, + SPATID INT NOT NULL, + DOUSPAT INT CHECK (DOUSPAT IN (1)) NULL, + MAINID INT, + TOPONYM TEXT NOT NULL, + DOUTOPO INT CHECK (DOUTOPO IN (1)) NULL, + LANG VARCHAR(3) NOT NULL, + DOULANG INT CHECK (DOULANG IN (1)) NULL, + PRONUNC TEXT, + DOUPRON INT CHECK (DOUPRON IN (1)) NULL, + ETYM TEXT, -- Always doubtful + ORIGIN VARCHAR(3), -- Always doubtful + COMMENTS TEXT, + OTHER TEXT, + FOREIGN KEY (SPATID) REFERENCES spatial(ID), + FOREIGN KEY (MAINID) REFERENCES linguistic(ID) +); + +CREATE TABLE temporal ( + ID INT PRIMARY KEY, + LINGID INT NOT NULL, + LINGNAME TEXT NOT NULL, + TOPFORMS TEXT, + START INT NOT NULL, + DOUSTART INT CHECK (DOUSTART IN (1)) NULL, + END INT, + DOUEND INT CHECK (DOUEND IN (1)) NULL, + EVENT TEXT CHECK (EVENT IN ('MERGEIN', 'ACTIVE', 'RENAME', 'CEASE')), + OBJID INT, + OBJNAME TEXT, + FULLTEXT TEXT NOT NULL, + COMMENTS TEXT, + OTHER TEXT, + FOREIGN KEY (LINGID, LINGNAME) REFERENCES linguistic(ID, TOPONYM), + FOREIGN KEY (OBJID, OBJNAME) REFERENCES linguistic(ID, TOPONYM) +);