dragoncode-decoder/js/parser.js

// Dragon Code V2.6 Parser

function parseDragonCode(input) {
    const result = {
        species: null,
        tags: {},
        errors: []
    };

    try {
        // Tokenize the input
        const tokens = tokenize(input);

        // Process each token
        tokens.forEach((token, index) => {
            try {
                const tagType = identifyTagType(token, index);

                if (tagType === 'species') {
                    result.species = parseSpecies(token);
                } else if (tagType) {
                    const parsed = parseTag(token, tagType);
                    if (parsed) {
                        result.tags[tagType] = parsed;
                    }
                }
            } catch (error) {
                result.errors.push(`Error parsing token "${token}": ${error.message}`);
            }
        });

    } catch (error) {
        result.errors.push(`Error tokenizing input: ${error.message}`);
    }

    return result;
}

// Tokenizer: splits Dragon Code into individual tokens
function tokenize(input) {
    // Remove DC2. prefix if present
    let code = input.trim();
    if (code.startsWith('DC2.')) {
        code = code.substring(4);
    } else if (code.startsWith('DC.')) {
        code = code.substring(3);
    }

    const tokens = [];
    let current = '';
    let inQuotes = false;

    for (let i = 0; i < code.length; i++) {
        const char = code[i];

        if (char === '"') {
            inQuotes = !inQuotes;
            current += char;
        } else if (char === ' ' && !inQuotes) {
            if (current.trim()) {
                tokens.push(current.trim());
            }
            current = '';
        } else {
            current += char;
        }
    }

    if (current.trim()) {
        tokens.push(current.trim());
    }

    return tokens;
}

// Identifies the type of tag
function identifyTagType(token, index) {
    // IMPORTANT: Check two-letter tags before single-letter tags to avoid conflicts
    // (e.g., Tc before T, Ac before A, Sk before S, Df before D)

    // Two-letter tags first
    if (token.startsWith('Tc')) return 'technology';
    if (token.startsWith('Ac')) return 'activity';
    if (token.startsWith('Sk')) return 'skinType';
    if (token.startsWith('Df')) return 'dragonFriend';
    if (token.startsWith('Ph') || token.startsWith('Pa') || token.startsWith('Pl') ||
        token.startsWith('Pw') || token.startsWith('Pt') || token.startsWith('Pv') ||
        token.startsWith('Pk') || token.startsWith('Pf') || token.startsWith('Pp')) return 'appendages';

    // Species tag is typically first, or contains species indicators
    if (index === 0 || token.match(/^[~^]?[A-Z][a-z]*[\[+{^~]?/) || token.includes('/') || token.includes('[') || token.includes('^')) {
        // At index 0, check if it's a single-letter or multi-letter species code first
        // Species codes can be: D, Dw, H, A, Ag, etc.
        if (index === 0) {
            // At index 0, check in this order:
            // 1. Two-letter tag prefixes (Tc, Ac, Sk, Df, Ph, etc.)
            // 2. Single-letter tags (G, L, W, T, etc.)
            // 3. Species indicators (~, ^, [, +) - clear species markers
            // 4. Multi-letter species codes (Dw, De, etc.)
            // 5. Fallback to species for remaining patterns

            // Two-letter tags (must check BEFORE single-letter checks)
            if (token.startsWith('Tc')) return 'technology';
            if (token.startsWith('Ac')) return 'activity';
            if (token.startsWith('Sk')) return 'skinType';
            if (token.startsWith('Df')) return 'dragonFriend';
            if (token.startsWith('Ph') || token.startsWith('Pa') || token.startsWith('Pl') ||
                token.startsWith('Pw') || token.startsWith('Pt') || token.startsWith('Pv') ||
                token.startsWith('Pk') || token.startsWith('Pf') || token.startsWith('Pp')) return 'appendages';

            // Special case: bare "H" at index 0 is ambiguous
            // "H" alone = Human (species), "H+++" = Hoard (tag)
            // Check if it's exactly "H" with no modifiers
            if (token === 'H') {
                return 'species'; // Treat bare "H" as Human species
            }

            // Single-letter tags (with or without modifiers)
            if (token.startsWith('G')) return 'gender';
            if (token.startsWith('L')) return 'length';
            if (token.startsWith('W')) return 'width';
            if (token.startsWith('T')) return 'weight';
            if (token.startsWith('C') && token[1] && token[1].match(/[a-z]/)) return 'color';
            if (token.startsWith('B')) return 'breath';
            if (token.startsWith('A')) return 'age';
            if (token.startsWith('N')) return 'nativeLand';
            if (token.startsWith('M')) return 'mating';
            if (token.startsWith('O')) return 'offspring';
            if (token.startsWith('H')) return 'hoard';
            if (token.startsWith('$')) return 'money';
            if (token.startsWith('F')) return 'diet';
            if (token.startsWith('R')) return 'reality';
            if (token.startsWith('J')) return 'humor';
            if (token.startsWith('S')) return 'social';
            if (token.startsWith('U')) return 'ubiquity';
            if (token.startsWith('I')) return 'irritability';
            if (token.startsWith('V')) return 'magic';
            if (token.startsWith('Q')) return 'psyPower';
            if (token.startsWith('E')) return 'emotion';

            // Check for species-specific indicators (after tag checks)
            // ~ at start (shapeshifter), or [ ] (trapped), or + (cross)
            if (token.startsWith('~') || token.includes('[') || token.includes('+')) {
                return 'species';
            }
            // ^ for shaped (H^Dw), but not if it's a tag with ^ modifier
            if (token.includes('^') && token.match(/[A-Z][a-z]*\^[A-Z]/)) {
                return 'species';
            }
            // / for multiple species in shapeshifter (must have species codes on both sides)
            // E.g., "Dw/H" or within "~Dw/H", but NOT "O/" or "M/"
            if (token.includes('/') && token.match(/[A-Z][a-z]*\/[A-Z]/)) {
                return 'species';
            }

            // Multi-letter species codes (Dw, De, Ag, etc.)
            if (token.match(/^[A-Z][a-z]+/)) {
                return 'species';
            }
        }

        // Check if it's actually a non-species tag
        if (token.startsWith('G')) return 'gender';
        if (token.startsWith('L')) return 'length';
        if (token.startsWith('W')) return 'width';
        if (token.startsWith('T') && !token.startsWith('Tc')) return 'weight';
        if (token.startsWith('C') && (token.length > 1 && token[1].match(/[a-z]/))) return 'color';
        if (token.startsWith('B')) return 'breath';
        if (token.startsWith('A') && !token.startsWith('Ac')) return 'age';
        if (token.startsWith('N')) return 'nativeLand';
        if (token.startsWith('M')) return 'mating';
        if (token.startsWith('O')) return 'offspring';
        if (token.startsWith('H')) return 'hoard';
        if (token.startsWith('$')) return 'money';
        if (token.startsWith('F')) return 'diet';
        if (token.startsWith('R')) return 'reality';
        if (token.startsWith('J')) return 'humor';
        if (token.startsWith('S') && !token.startsWith('Sk')) return 'social';
        if (token.startsWith('U')) return 'ubiquity';
        if (token.startsWith('I')) return 'irritability';
        if (token.startsWith('V')) return 'magic';
        if (token.startsWith('Q')) return 'psyPower';
        if (token.startsWith('E')) return 'emotion';

        // If none of the above, likely species
        if (index === 0) return 'species';
    }

    // Single-letter tags (checked after two-letter tags)
    // Gender
    if (token.startsWith('G')) return 'gender';

    // Length (with or without numbers)
    if (token.startsWith('L')) return 'length';

    // Width (with any modifiers)
    if (token.startsWith('W')) return 'width';

    // Weight (Tonnage) - already checked it's not Technology
    if (token.startsWith('T')) return 'weight';

    // Color
    if (token.startsWith('C')) return 'color';

    // Breath Weapon
    if (token.startsWith('B')) return 'breath';

    // Age - already checked it's not Activity
    if (token.startsWith('A')) return 'age';

    // Native Land
    if (token.startsWith('N')) return 'nativeLand';

    // Mating
    if (token.startsWith('M')) return 'mating';

    // Offspring
    if (token.startsWith('O')) return 'offspring';

    // Hoard
    if (token.startsWith('H')) return 'hoard';

    // Money
    if (token.startsWith('$')) return 'money';

    // Diet
    if (token.startsWith('F')) return 'diet';

    // Reality
    if (token.startsWith('R')) return 'reality';

    // Humor
    if (token.startsWith('J')) return 'humor';

    // Social - already checked it's not Skin Type
    if (token.startsWith('S')) return 'social';

    // Ubiquity
    if (token.startsWith('U')) return 'ubiquity';

    // Irritability
    if (token.startsWith('I')) return 'irritability';

    // Magic
    if (token.startsWith('V')) return 'magic';

    // Psy Power
    if (token.startsWith('Q')) return 'psyPower';

    // Emotion
    if (token.startsWith('E')) return 'emotion';

    return null;
}

// Parse species (handles ~, ^, [], +, {})
function parseSpecies(token) {
    const result = {
        type: 'simple',
        value: null,
        modifiers: [],
        raw: token
    };

    // Handle quoted custom species
    if (token.startsWith('"') && token.endsWith('"')) {
        result.type = 'custom';
        result.value = token.substring(1, token.length - 1);
        return result;
    }

    // Shapechanger: ~species1/species2
    if (token.includes('~') && token.includes('/')) {
        result.type = 'shapechanger';
        const parts = token.split('/');
        result.forms = parts.map(p => p.replace('~', '').trim());

        // Check for {} modifiers
        result.forms = result.forms.map(form => {
            if (form.includes('{')) {
                const match = form.match(/([^{]+)\{([^}]+)\}/);
                if (match) {
                    return { species: match[1], modifier: match[2] };
                }
            }
            return { species: form, modifier: null };
        });

        return result;
    }

    // Cross-breed: species1+species2
    if (token.includes('+') && !token.match(/^[+\-]/)) {
        result.type = 'cross';
        result.species = token.split('+').map(s => s.trim());
        return result;
    }

    // Trapped form: species1[species2]
    if (token.includes('[') && token.includes(']')) {
        result.type = 'trapped';
        const match = token.match(/([^\[]+)\[([^\]]+)\]/);
        if (match) {
            result.trueForm = match[1];
            result.trappedIn = match[2];
        }
        return result;
    }

    // Shaped: species1^species2
    if (token.includes('^')) {
        result.type = 'shaped';
        const parts = token.split('^');
        result.trueForm = parts[0];
        result.shapedAs = parts[1];
        return result;
    }

    // Simple species code
    result.value = token;
    return result;
}

// Parse individual tag based on type
function parseTag(token, type) {
    const parsers = {
        gender: parseGender,
        length: parseLength,
        width: parseSimpleModifier,
        weight: parseSimpleModifier,
        appendages: parseAppendages,
        skinType: parseSkinType,
        color: parseColor,
        breath: parseBreath,
        age: parseSimpleModifier,
        nativeLand: parseSimpleModifier,
        mating: parseMating,
        offspring: parseOffspring,
        hoard: parseSimpleModifier,
        money: parseSimpleModifier,
        diet: parseDiet,
        reality: parseSimpleModifier,
        activity: parseSimpleModifier,
        humor: parseSimpleModifier,
        social: parseSimpleModifier,
        ubiquity: parseSimpleModifier,
        irritability: parseSimpleModifier,
        magic: parseSimpleModifier,
        psyPower: parseSimpleModifier,
        technology: parseTechnology,
        emotion: parseSimpleModifier,
        dragonFriend: parseSimpleModifier
    };

    const parser = parsers[type];
    if (parser) {
        return parser(token);
    }

    return { raw: token };
}

// Parse gender
function parseGender(token) {
    // Handle quoted custom gender
    if (token.includes('"')) {
        const match = token.match(/"([^"]+)"/);
        return {
            type: 'custom',
            value: match ? match[1] : token,
            raw: token
        };
    }

    return {
        value: token.substring(1), // Remove 'G' prefix
        modifiers: extractModifiers(token.substring(1)),
        raw: token
    };
}

// Parse length (quantitative with units)
function parseLength(token) {
    const result = {
        raw: token,
        value: null,
        unit: null,
        dimensions: [],  // For quantitative modifiers like 4t (4m tail)
        modifiers: null  // For qualitative modifiers like +++, -, etc.
    };

    // Check if it has a numeric value (quantitative)
    const match = token.match(/L(\d+)([a-z]+)?/i);
    if (match) {
        result.value = parseInt(match[1]);
        result.unit = match[2] || null;

        // Extract dimension modifiers (a, l, n, t, w, h)
        const dimMatch = token.match(/\d+[a-z]*(\d*[alntwh])/);
        if (dimMatch) {
            result.dimensions = dimMatch[1].split('').map(m => {
                if (m.match(/\d/)) return null;
                return m;
            }).filter(Boolean);
        }
    } else {
        // Qualitative length (L+++, L-, L, etc.)
        // Remove 'L' prefix and extract modifiers
        const modifierPart = token.substring(1);
        result.modifiers = extractModifiers(modifierPart);
    }

    return result;
}

// Parse simple modifier tags (+, -, !, ?, ~, etc.)
function parseSimpleModifier(token) {
    // Remove tag prefix to get modifiers
    let modifierPart = token.replace(/^[A-Z][a-z]?/, '');

    return {
        modifiers: extractModifiers(modifierPart),
        raw: token
    };
}

// Extract modifiers from a string
function extractModifiers(str) {
    const modifiers = {
        plus: 0,
        minus: 0,
        exclaim: false,
        question: false,
        tilde: false,
        caret: false,
        slash: false,
        real: false,
        virtual: false
    };

    for (let char of str) {
        if (char === '+') modifiers.plus++;
        else if (char === '-') modifiers.minus++;
        else if (char === '!') modifiers.exclaim = true;
        else if (char === '?') modifiers.question = true;
        else if (char === '~') modifiers.tilde = true;
        else if (char === '^') modifiers.caret = true;
        else if (char === '/') modifiers.slash = true;
    }

    // Check for r/v (real/virtual)
    if (str.includes('r')) modifiers.real = true;
    if (str.includes('v')) modifiers.virtual = true;

    return modifiers;
}

// Parse appendages (complex sequence)
function parseAppendages(token) {
    const result = {
        parts: [],
        raw: token
    };

    // Parse the appendage code
    let current = '';
    for (let i = 0; i < token.length; i++) {
        const char = token[i];

        if (char === 'P') continue; // Skip prefix

        if (char.match(/[halvwtkfp']/)) {
            if (current) {
                result.parts.push(parseAppendagePart(current));
            }
            current = char;
        } else {
            current += char;
        }
    }

    if (current) {
        result.parts.push(parseAppendagePart(current));
    }

    return result;
}

function parseAppendagePart(part) {
    const type = part[0];
    const modifiers = part.substring(1);

    return {
        type: type,
        modifiers: extractModifiers(modifiers),
        count: modifiers.match(/\d+/) ? parseInt(modifiers.match(/\d+/)[0]) : null
    };
}

// Parse skin type
function parseSkinType(token) {
    const result = {
        mainType: null,
        bodyPartTypes: [],  // Array of {part, type} objects
        modifiers: {},
        raw: token
    };

    // Remove Sk prefix
    let content = token.substring(2);

    // Check for ? modifier
    if (content.includes('?')) {
        result.modifiers.question = true;
        content = content.replace('?', '');
    }

    // Split by comma to separate main type from body part modifiers
    const parts = content.split(',');

    // First part is the main skin type (single letter)
    if (parts[0]) {
        result.mainType = parts[0][0]; // First character is the main type
    }

    // Remaining parts are body part modifiers (format: <part><type>)
    for (let i = 1; i < parts.length; i++) {
        const part = parts[i];
        if (part.length >= 2) {
            // First character is body part, rest is skin type
            const bodyPart = part[0];
            const skinType = part.substring(1);
            result.bodyPartTypes.push({
                part: bodyPart,
                type: skinType
            });
        }
    }

    return result;
}

// Parse color (MOST COMPLEX)
function parseColor(token) {
    const result = {
        colors: [],
        raw: token
    };

    // Remove C prefix
    let content = token.substring(1);

    // Split by / for multiple colors, and handle \ as additional color/effect
    const colorParts = content.split(/[\/\\]/);

    colorParts.forEach(colorPart => {
        if (colorPart) {
            result.colors.push(parseColorPart(colorPart));
        }
    });

    return result;
}

function parseColorPart(part) {
    const color = {
        base: null,
        intensity: [],
        patterns: [],
        bodyParts: [],
        modifiers: []
    };

    // Extract base color (2-3 letter code)
    const baseMatch = part.match(/^([a-z]{2,3})/);
    if (baseMatch) {
        color.base = baseMatch[1];
        part = part.substring(baseMatch[1].length);
    }

    // Check for 'lum' (luminescent) special modifier
    if (part.includes('lum')) {
        color.intensity.push('lum');
        part = part.replace('lum', '');
    }

    // Extract intensity modifiers (+, -, ^, _, ', %, !)
    for (let char of part) {
        if (['+', '-', '^', '_', "'", '%', '!'].includes(char)) {
            color.intensity.push(char);
        } else if (['|', '=', ':', '*', '@', '#', '&', '>'].includes(char)) {
            color.patterns.push(char);
        } else if (char === ',') {
            // Body part modifier follows
            break;
        }
    }

    // Extract body part modifiers (,a, ,b, etc.)
    const bodyPartMatches = part.match(/,([a-z])/g);
    if (bodyPartMatches) {
        color.bodyParts = bodyPartMatches.map(m => m.substring(1));
    }

    // Check for special patterns like &1, &2
    const specialPattern = part.match(/&(\d+)/);
    if (specialPattern) {
        color.patterns.push('&' + specialPattern[1]);
    }

    return color;
}

// Parse breath weapon
function parseBreath(token) {
    // Handle quoted custom breath
    if (token.includes('"')) {
        const match = token.match(/"([^"]+)"/);
        return {
            type: 'custom',
            value: match ? match[1] : token,
            raw: token
        };
    }

    const result = {
        types: [],
        modifiers: [],
        raw: token
    };

    // Remove B prefix
    let content = token.substring(1);

    // Check for modifiers
    if (content.includes('|')) result.modifiers.push('beam');
    if (content.includes('#')) result.modifiers.push('cloud');

    // Handle simple +/- modifiers (no specific type)
    if (content.match(/^[+\-!?~]+$/)) {
        result.simple = extractModifiers(content);
        return result;
    }

    // Extract breath types (fl, ac, ic, etc.)
    if (content.length > 0) {
        const types = content.split('/').map(t => t.replace(/[|#]/g, '').trim()).filter(Boolean);
        result.types = types;
    } else {
        // "B" with no content - normal breath
        result.simple = extractModifiers('');
    }

    return result;
}

// Parse mating
function parseMating(token) {
    const result = {
        modifiers: extractModifiers(token.substring(1)),
        count: null,
        separations: null,
        raw: token
    };

    // Extract count (number at end)
    const countMatch = token.match(/(\d+)$/);
    if (countMatch) {
        result.count = parseInt(countMatch[1]);
    }

    // Extract separations (^number)
    const sepMatch = token.match(/\^(\d+)/);
    if (sepMatch) {
        result.separations = parseInt(sepMatch[1]);
    }

    return result;
}

// Parse offspring
function parseOffspring(token) {
    const result = {
        modifiers: extractModifiers(token.substring(1)),
        count: null,
        adopted: false,
        raw: token
    };

    // Check for adopted (a modifier)
    if (token.includes('a')) {
        result.adopted = true;
    }

    // Extract count
    const countMatch = token.match(/(\d+)/);
    if (countMatch) {
        result.count = parseInt(countMatch[1]);
    }

    return result;
}

// Parse diet
function parseDiet(token) {
    const result = {
        modifiers: extractModifiers(token.substring(1)),
        types: [],
        raw: token
    };

    // Extract diet type letters (c, h, o, v)
    const content = token.substring(1);
    for (let char of content) {
        if (['c', 'h', 'o', 'v'].includes(char)) {
            result.types.push(char);
        }
    }

    return result;
}

// Parse technology
function parseTechnology(token) {
    const result = {
        modifiers: extractModifiers(token.substring(2)),
        specialist: null,
        raw: token
    };

    // Extract specialist field in []
    const specialistMatch = token.match(/\[([^\]]+)\]/);
    if (specialistMatch) {
        result.specialist = specialistMatch[1];
    }

    return result;
}

// Export for Node.js (CommonJS)
if (typeof module !== 'undefined' && module.exports) {
    module.exports = { parseDragonCode };
}