Click here to Skip to main content
15,868,016 members
Please Sign up or sign in to vote.
5.00/5 (1 vote)
See more:
Yes, the classic LCS problem. Given two or more strings find the longest common substring in each string.

One caveat: No C#, C, C++, Javascript or any C-based programming language[^], nor any VB-based language (VB, VB.NET, VBScript). That cuts out most of the easy languages. Spread your wings!

Your solution should take into account null strings and strings whose length is limited only by available storage.

What I have tried:

Melatonin for jetlag but it doesn't work.

Graeme_Grant again gets the guernsey for his outstanding and, I'm assuming, time consuming treatment of last week's problem. Noice.
Posted
Updated 5-Feb-17 16:41pm
Comments
Graeme_Grant 27-Jan-17 10:36am    
Thanks Chris. But not really ... tinkering while watching a movie...

So looking at your Link, R is out but F# & PowerShell are okay then ;)
Patrice T 27-Jan-17 15:02pm    
What is supposed to be the result ? Just the length ? The substring ?
Graeme_Grant 27-Jan-17 15:53pm    
"find the longest common substring" .. my guess is the common substring...
Chris Maunder 27-Jan-17 16:58pm    
Yes.
Jon McKee 27-Jan-17 16:43pm    
Perl is considered C-based but F# isn't? That's a little odd tbh. If anything Perl has less in common with C than F#. Also that link includes B which was the precursor to C which I find baffling.

As if F# was not as far away from normal for me (second attempt ever), here is a FreePascal version for PIEBALDConsultant. This is my first attempt at programming with this language, so please be gentle as I am sure there is plenty of room for improvement...

Delphi
Program LongestSubstringContained(output);
type
    dynStrings = array of string;

procedure QuickSort(input : dynStrings; lowPos, highPos : integer);
var
  movablePointer, immovablePointer, temporaryPointer : integer;
  temporaryItem : string;

begin
  immovablePointer := lowPos; 
  movablePointer := highPos;

  while (movablePointer <> immovablePointer) do
  begin
    if(movablePointer > immovablePointer) then
    begin
      if(input[movablePointer] < input[immovablePointer]) then
      begin
        temporaryItem := input[movablePointer];
        input[movablePointer] := input[immovablePointer];
        input[immovablePointer] := temporaryItem;

        temporaryPointer := movablePointer; 
        movablePointer := immovablePointer;
        immovablePointer := temporaryPointer;
      end
      else
      begin
        dec(movablePointer); 
      end;
    end
    else
    begin
      if(input[movablePointer] > input[immovablePointer]) then
      begin
        temporaryItem := input[movablePointer];
        input[movablePointer] := input[immovablePointer];
        input[immovablePointer] := temporaryItem;

        temporaryPointer := movablePointer;
        movablePointer := immovablePointer;
        immovablePointer := temporaryPointer;
      end
      else
      begin
        inc(movablePointer);
      end;
    end;
  end;

  if(movablePointer > lowPos) then
    QuickSort(input,lowPos,movablePointer-1);

  if(movablePointer < highPos) then
    QuickSort(input,movablePointer+1,highPos);
end;

//remove blank array entries...
function TrimArray(input: dynStrings) : dynStrings;
var
    i, len, count: integer;
    ret: dynStrings;

begin
    count := 0;
    len := Length(input);
    for i := 0 to len do
    begin
        if (Length(input[i]) > 0) then
        begin
            count := count + 1;
            setlength(ret, count);
            ret[count - 1] := input[i];
        end;
    end;
    TrimArray := ret;
end;

function RetrieveBestResult(strings :dynStrings) : string;
var
    str, tmp: string;
    strLen, strCount, i, len, tmpCount: integer;

begin
    tmpCount := 0;
    strCount := 0;
    strLen := 0;
    tmp := '';
    str := '';

    // order the array
    QuickSort(strings, low(strings), high(strings));
    // find the most popular longest string
    for i := 0 to high(strings) do
    begin
        len := length(strings[i]);
        if (len >= strLen) then
        begin
            strCount := 1;
            str := strings[i];
            strLen := len;
        end
        else if (str = strings[i]) then
            strCount := strCount + 1

        else if (tmp = strings[i]) then
            tmpCount := tmpCount + 1
        else
        begin
            tmp := strings[i];
            tmpCount := 1;
        end;
    end;
    RetrieveBestResult := str;
end;

// check for a match
function StrInArray(value : string; strings :dynStrings) : Boolean;
var
    str : String;

begin
    for str in strings do
    begin
        if length(value) = 0 then
            exit(false);
       //if (lowercase(value) = lowercase(str)) then
       if (value = str) then
        begin
            exit(true);
        end;
    end;
  StrInArray := false;
end;

function Process(input: dynStrings) : string;
var
    matches: dynStrings;
    allMatches: dynStrings;
    i, c, cc, count, len: integer;
    str, sstr: string;

begin
    setlength(allMatches, 0);
    setlength(matches, 0);
    for i := 0 to high(input) do
    begin
        str := input[i];
        count := 0;
        len := length(str);
        for c := 0 to len do
        begin
            for cc := 1 to len - c do
            begin
                sstr := copy(str, c, cc);
                if (high(allmatches) = -1) or (StrInArray(sstr, allMatches)) then
                begin
                    count := count + 1;
                    setlength(matches, count);
                    matches[count - 1] := sstr;
                end;
            end;
        end;
        // bounce early if no matches
        if (high(matches) < 1) then
            exit('no match');
        allMatches := copy(matches, 0, length(matches));
        writeln('Matches found: ', high(allMatches));
        setlength(matches, 0);
    end;
    Process := RetrieveBestResult(allMAtches);
end;

function GetLCS(input: dynStrings) : string;
var
    count: integer;
    work: dynStrings;

begin
    // check and clean
    count := Length(input);
    if (count = 0) then
        exit('empty')
    else
    begin
        work := TrimArray(input);
        count := Length(work);
        if (count = 0) then
            exit('empty');
    end;
    // process if work to be done...
    writeln('processing...');
    GetLCS := Process(work);
end;

var
    tests: array[0..4] of string;
    result: string;

begin
    tests[0] := 'Yes, the classic LCS problem. Given two or more strings find the longest common substring in each string.';
    tests[1] := '';
    tests[2] := 'One caveat: No C#, C, C++, Javascript or any C-based programming language[^], nor any VB-based language (VB, VB.NET, VBScript). That cuts out most of the easy languages. Spread your wings!';
    tests[3] := '';
    tests[4] := 'Your solution should take into account null strings and strings whose length is limited only by available storage.';

    result := GetLCS(tests);
    writeln('Longest Substring: ', result);
end.

And here is the output...
sh-4.3$ fpc -vw main.pas                                                                                                                                              
Free Pascal Compiler version 2.6.4 [2015/06/17] for x86_64                                                                                                            
Copyright (c) 1993-2014 by Florian Klaempfl and others                                                                                                                
Target OS: Linux for x86-64                                                                                                                                           
Compiling main.pas                                                                                                                                                    
Linking main                                                                                                                                                                                                                                                                                                                                
/usr/bin/ld: warning: link.res contains output sections; did you forget -T?                                                                                           209 lines compiled, 0.1 sec                                                                                                                                           
sh-4.3$ main                                                                                                                                                          
processing...                                                                                                                                                         
Matches found: 5564                                                                                                                                                   
Matches found: 253                                                                                                                                                    
Matches found: 147                                                                                                                                                    
Longest Substring: ings                                                                                                                                               
sh-4.3$

This solution was built using the Coding Ground[^] online IDE. Here is a link to the solution[^] where you can compile and run it.

Enjoy! :)
 
Share this answer
 
v2
Comments
Bryian Tan 28-Jan-17 18:51pm    
Two solution from Graeme_Grant!!! Look like you are the only participant !!!! :) anyway congrats on your previous two competitions. You done a great job. I think you getting this one too.
Graeme_Grant 28-Jan-17 18:53pm    
Thanks Bryian!

The second one was only after a chat with PIEBALDConsultant... Never done Pascal before so I thought that I'd try it.

Jump in and have a go! [edit:] I've learned 3 new languages doing these challenges! :)
Bryian Tan 28-Jan-17 18:59pm    
Good job anyway.
Okay, as F# is acceptable, here is an F# version that will accept multiple inputs to be compared and return the "Longest common substring". I am using the challenge's text (including nulls) as the input for testing... ;)

F#
open System
open System.Collections.Generic
open System.Linq

let msg = "Yes, the classic LCS problem. Given two or more strings find the longest common substring in each string.\n\
           \n\
           One caveat: No C#, C, C++, Javascript or any C-based programming language[^], nor any VB-based language (VB, VB.NET, VBScript). That cuts out most of the easy languages. Spread your wings!\n\
           \n\
           Your solution should take into account null strings and strings whose length is limited only by available storage."

let tests = msg.Split [|'\n'|]

let GetSubstrings (s : string)  = 
    [ for c1 in 0 .. s.Length - 1 do
        for c2 in c1 .. s.Length - 1 do
                yield s.[c1..c2]
        ]

let addRange input (targetSet : ICollection<_>) =
        input |> Seq.iter(fun x -> targetSet.Add(x))

let GetLongestCommonSubstring (strings : string[]) =
    match (Seq.toList (strings.Where( fun x -> not(x.Equals(""))))) with
    | [] -> "empty"
    | x::xs -> 
        let subStrings = new HashSet<string>()
        addRange (GetSubstrings(x)) subStrings
        xs |> List.map(fun y -> 
            let yy = new HashSet<string>() 
            addRange (GetSubstrings(y)) yy 
            subStrings.IntersectWith(yy)) |> ignore
        subStrings.OrderByDescending(fun s -> s.Length).First()
        
[<EntryPoint>]
let main argv = 
    printfn "-----------------------------------------------------------------------\r\n"
    printfn "Input strings (including nulls / blank lines):\r\n"
    printfn "%A\r\n" msg
    printfn "-----------------------------------------------------------------------\r\n"
    printfn "** Longest common substring found is: %A\r\n" (GetLongestCommonSubstring(tests))
    printfn "\r\n-- Press any key to exit --";
    Console.ReadKey() |> ignore;
    0

... and the output:
-----------------------------------------------------------------------

Input strings (including nulls / blank lines):

"Yes, the classic LCS problem. Given two or more strings find the longest common substring in each string.

One caveat: No C#, C, C++, Javascript or any C-based programming language[^], nor any VB-based language (VB, VB.NET, VBScript). That cuts out most of the easy languages. Spread your wings!

Your solution should take into account null strings and strings whose length is limited only by available storage."

-----------------------------------------------------------------------

** Longest common substring found is: "ings"


-- Press any key to exit --
 
Share this answer
 
Comments
Patrice T 27-Jan-17 17:37pm    
May be I am tired, but in output, What is first string and what is second ?
Graeme_Grant 27-Jan-17 17:40pm    
My input is in the quotes "Yes, the classic ... available storage." - that's 5 strings, 2 are nulls. The output LCS after running the code over the input is "ings"
Patrice T 27-Jan-17 17:47pm    
Ok I see.
A couple of " around each string would have been clearer to me.
Graeme_Grant 27-Jan-17 17:53pm    
We were not given anything to work with, so yes, it was a block of text, not just a bunch of strings in an array. The challenge's own text appeared to be the best candidate. This allows for any number of strings to be used, not just two (2). The first thing I do is convert it into a string array.
Here's my C# implementation, just as a counter-example.

The PIEBALD.Type.Substring class is similar to the StringSlice class I used for the Bad Word Filter challenge, but with more bells and whistles.

C#
public System.Collections.Generic.IList<string>
LCS
(
  params string[] Subject
)
{
  System.Collections.Generic.List<string> result = 
    new System.Collections.Generic.List<string>() ;

  System.Collections.Generic.Dictionary<PIEBALD.Type.Substring,System.Collections.Generic.HashSet<int>> temp =
    new System.Collections.Generic.Dictionary<PIEBALD.Type.Substring,System.Collections.Generic.HashSet<int>>() ;

  int shortest = System.Int32.MaxValue ;

  /* Find the shortest input length */
  for ( int i = 0 ; i < Subject.Length ; i++ )
  {
    if ( shortest > Subject [ i ].Length )
    {
      shortest = Subject [ i ].Length ;
    }
  }

  int max = 0 ;

  for ( ; ( max < Subject.Length ) && ( shortest > 0 ) ; shortest-- )
  {
    max = 0 ;

    temp.Clear() ;

    foreach 
    ( 
      PIEBALD.Type.Substring s 
    in 
      PIEBALD.Type.Substring.EnumSubstrings ( Subject [ 0 ] , shortest ) 
    )
    {
      temp [ s ] = new System.Collections.Generic.HashSet<int>() ;

      temp [ s ].Add ( 0 ) ;
    }

    for ( int i = 1 ; i < Subject.Length ; i++ )
    {
      foreach 
      ( 
        PIEBALD.Type.Substring s 
      in 
        PIEBALD.Type.Substring.EnumSubstrings ( Subject [ i ] , shortest ) 
      )
      {
        System.Collections.Generic.HashSet<int> v ;
                
        if ( temp.TryGetValue ( s , out v ) )
        {
          v.Add ( i ) ;

          if ( max < v.Count )
          {
            max = v.Count ;
          }
        }
      }

      if ( max < i )
      {
        break ;
      }
      else
      {
        // Remove dead entries maybe
      }
    }
  }

  foreach
  (
    System.Collections.Generic.KeyValuePair<PIEBALD.Type.Substring,System.Collections.Generic.HashSet<int>> kvp
  in 
    temp
  )
  {
    if ( kvp.Value.Count == Subject.Length )
    {
      result.Add ( kvp.Key.ToString() ) ;
    }
  }

  return ( result.AsReadOnly() ) ;
}
 
Share this answer
 
v2

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900