summaryrefslogblamecommitdiffstats
path: root/externals/lurlparser/LUrlParser.cpp
blob: 9c134e330383d68f054fa08122bdb68c15535695 (plain) (tree)








































































































































































































































































                                                                                                                              
/*
 * Lightweight URL & URI parser (RFC 1738, RFC 3986)
 * https://github.com/corporateshark/LUrlParser
 *
 * The MIT License (MIT)
 *
 * Copyright (C) 2015 Sergey Kosarevsky (sk@linderdaum.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "LUrlParser.h"

#include <algorithm>
#include <cstring>
#include <stdlib.h>

// check if the scheme name is valid
static bool IsSchemeValid( const std::string& SchemeName )
{
    for ( auto c : SchemeName  )
    {
        if ( !isalpha( c ) && c != '+' && c != '-' && c != '.' ) return false;
    }

    return true;
}

bool LUrlParser::clParseURL::GetPort( int* OutPort ) const
{
    if ( !IsValid() ) { return false; }

    int Port = atoi( m_Port.c_str() );

    if ( Port <= 0 || Port > 65535 ) { return false; }

    if ( OutPort ) { *OutPort = Port; }

    return true;
}

// based on RFC 1738 and RFC 3986
LUrlParser::clParseURL LUrlParser::clParseURL::ParseURL( const std::string& URL )
{
    LUrlParser::clParseURL Result;

    const char* CurrentString = URL.c_str();

    /*
     *	<scheme>:<scheme-specific-part>
     *	<scheme> := [a-z\+\-\.]+
     *	For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names
     */

    // try to read scheme
    {
        const char* LocalString = strchr( CurrentString, ':' );

        if ( !LocalString )
        {
            return clParseURL( LUrlParserError_NoUrlCharacter );
        }

        // save the scheme name
        Result.m_Scheme = std::string( CurrentString, LocalString - CurrentString );

        if ( !IsSchemeValid( Result.m_Scheme ) )
        {
            return clParseURL( LUrlParserError_InvalidSchemeName );
        }

        // scheme should be lowercase
        std::transform( Result.m_Scheme.begin(), Result.m_Scheme.end(), Result.m_Scheme.begin(), ::tolower );

        // skip ':'
        CurrentString = LocalString+1;
    }

    /*
     *	//<user>:<password>@<host>:<port>/<url-path>
     *	any ":", "@" and "/" must be normalized
     */

    // skip "//"
    if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );
    if ( *CurrentString++ != '/' ) return clParseURL( LUrlParserError_NoDoubleSlash );

    // check if the user name and password are specified
    bool bHasUserName = false;

    const char* LocalString = CurrentString;

    while ( *LocalString )
    {
        if ( *LocalString == '@' )
        {
            // user name and password are specified
            bHasUserName = true;
            break;
        }
        else if ( *LocalString == '/' )
        {
            // end of <host>:<port> specification
            bHasUserName = false;
            break;
        }

        LocalString++;
    }

    // user name and password
    LocalString = CurrentString;

    if ( bHasUserName )
    {
        // read user name
        while ( *LocalString && *LocalString != ':' && *LocalString != '@' ) LocalString++;

        Result.m_UserName = std::string( CurrentString, LocalString - CurrentString );

        // proceed with the current pointer
        CurrentString = LocalString;

        if ( *CurrentString == ':' )
        {
            // skip ':'
            CurrentString++;

            // read password
            LocalString = CurrentString;

            while ( *LocalString && *LocalString != '@' ) LocalString++;

            Result.m_Password = std::string( CurrentString, LocalString - CurrentString );

            CurrentString = LocalString;
        }

        // skip '@'
        if ( *CurrentString != '@' )
        {
            return clParseURL( LUrlParserError_NoAtSign );
        }

        CurrentString++;
    }

    bool bHasBracket = ( *CurrentString == '[' );

    // go ahead, read the host name
    LocalString = CurrentString;

    while ( *LocalString )
    {
        if ( bHasBracket && *LocalString == ']' )
        {
            // end of IPv6 address
            LocalString++;
            break;
        }
        else if ( !bHasBracket && ( *LocalString == ':' || *LocalString == '/' ) )
        {
            // port number is specified
            break;
        }

        LocalString++;
    }

    Result.m_Host = std::string( CurrentString, LocalString - CurrentString );

    CurrentString = LocalString;

    // is port number specified?
    if ( *CurrentString == ':' )
    {
        CurrentString++;

        // read port number
        LocalString = CurrentString;

        while ( *LocalString && *LocalString != '/' ) LocalString++;

        Result.m_Port = std::string( CurrentString, LocalString - CurrentString );

        CurrentString = LocalString;
    }

    // end of string
    if ( !*CurrentString )
    {
        Result.m_ErrorCode = LUrlParserError_Ok;

        return Result;
    }

    // skip '/'
    if ( *CurrentString != '/' )
    {
        return clParseURL( LUrlParserError_NoSlash );
    }

    CurrentString++;

    // parse the path
    LocalString = CurrentString;

    while ( *LocalString && *LocalString != '#' && *LocalString != '?' ) LocalString++;

    Result.m_Path = std::string( CurrentString, LocalString - CurrentString );

    CurrentString = LocalString;

    // check for query
    if ( *CurrentString == '?' )
    {
        // skip '?'
        CurrentString++;

        // read query
        LocalString = CurrentString;

        while ( *LocalString && *LocalString != '#' ) LocalString++;

        Result.m_Query = std::string( CurrentString, LocalString - CurrentString );

        CurrentString = LocalString;
    }

    // check for fragment
    if ( *CurrentString == '#' )
    {
        // skip '#'
        CurrentString++;

        // read fragment
        LocalString = CurrentString;

        while ( *LocalString ) LocalString++;

        Result.m_Fragment = std::string( CurrentString, LocalString - CurrentString );

        CurrentString = LocalString;
    }

    Result.m_ErrorCode = LUrlParserError_Ok;

    return Result;
}